Merge pull request #35466 from ClickHouse/black

Check python black formatting
This commit is contained in:
Mikhail f. Shiryaev 2022-03-22 23:53:02 +01:00 committed by GitHub
commit 71fb04ea4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
738 changed files with 93914 additions and 44941 deletions

View File

@ -4,11 +4,12 @@
import sys
import json
def parse_block(block=[], options=[]):
#print('block is here', block)
#show_query = False
#show_query = options.show_query
# print('block is here', block)
# show_query = False
# show_query = options.show_query
result = []
query = block[0].strip()
if len(block) > 4:
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
timing2 = block[2].strip().split()[1]
timing3 = block[3].strip().split()[1]
if options.show_queries:
result.append( query )
result.append(query)
if not options.show_first_timings:
result += [ timing1 , timing2, timing3 ]
result += [timing1, timing2, timing3]
else:
result.append(timing1)
return result
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
for line in f.readlines():
if 'SELECT' in line:
if "SELECT" in line:
if len(block) > 1:
result.append( parse_block(block, options) )
block = [ line ]
elif 'Time:' in line:
block.append( line )
result.append(parse_block(block, options))
block = [line]
elif "Time:" in line:
block.append(line)
return result
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
def compare_stats_files(options, arguments):
result = []
file_output = []
pyplot_colors = ['y', 'b', 'g', 'r']
pyplot_colors = ["y", "b", "g", "r"]
for fname in arguments[1:]:
file_output.append((read_stats_file(options, fname)))
if len(file_output[0]) > 0:
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
for idx, data_set in enumerate(file_output):
int_result = []
for timing in data_set:
int_result.append(float(timing[0])) #y values
result.append([[x for x in range(0, len(int_result)) ], int_result,
pyplot_colors[idx] + '^' ] )
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
int_result.append(float(timing[0])) # y values
result.append(
[
[x for x in range(0, len(int_result))],
int_result,
pyplot_colors[idx] + "^",
]
)
# result.append([x for x in range(1, len(int_result)) ]) #x values
# result.append( pyplot_colors[idx] + '^' )
return result
def parse_args():
from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
parser.add_option(
"-q",
"--show-queries",
help="Show statements along with timings",
action="store_true",
dest="show_queries",
)
parser.add_option(
"-f",
"--show-first-timings",
help="Show only first tries timings",
action="store_true",
dest="show_first_timings",
)
parser.add_option(
"-c",
"--compare-mode",
help="Prepare output for pyplot comparing result files.",
action="store",
dest="compare_mode",
)
(options, arguments) = parser.parse_args(sys.argv)
if len(arguments) < 2:
parser.print_usage()
sys.exit(1)
return ( options, arguments )
return (options, arguments)
def gen_pyplot_code(options, arguments):
result = ''
result = ""
data_sets = compare_stats_files(options, arguments)
for idx, data_set in enumerate(data_sets, start=0):
x_values, y_values, line_style = data_set
result += '\nplt.plot('
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
result += ', label=\'%s try\')' % idx
print('import matplotlib.pyplot as plt')
result += "\nplt.plot("
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
result += ", label='%s try')" % idx
print("import matplotlib.pyplot as plt")
print(result)
print( 'plt.xlabel(\'Try number\')' )
print( 'plt.ylabel(\'Timing\')' )
print( 'plt.title(\'Benchmark query timings\')' )
print('plt.legend()')
print('plt.show()')
print("plt.xlabel('Try number')")
print("plt.ylabel('Timing')")
print("plt.title('Benchmark query timings')")
print("plt.legend()")
print("plt.show()")
def gen_html_json(options, arguments):
tuples = read_stats_file(options, arguments[1])
print('{')
print("{")
print('"system: GreenPlum(x2),')
print(('"version": "%s",' % '4.3.9.1'))
print(('"version": "%s",' % "4.3.9.1"))
print('"data_size": 10000000,')
print('"time": "",')
print('"comments": "",')
print('"result":')
print('[')
print("[")
for s in tuples:
print(s)
print(']')
print('}')
print("]")
print("}")
def main():
( options, arguments ) = parse_args()
(options, arguments) = parse_args()
if len(arguments) > 2:
gen_pyplot_code(options, arguments)
else:
gen_html_json(options, arguments)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@ -11,7 +11,7 @@ def removesuffix(text, suffix):
https://www.python.org/dev/peps/pep-0616/
"""
if suffix and text.endswith(suffix):
return text[:-len(suffix)]
return text[: -len(suffix)]
else:
return text[:]

View File

@ -3,55 +3,55 @@ import subprocess
import datetime
from flask import Flask, flash, request, redirect, url_for
def run_command(command, wait=False):
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
lines = []
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True)
p = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
)
if wait:
for l in iter(p.stdout.readline, b''):
for l in iter(p.stdout.readline, b""):
lines.append(l)
p.poll()
return (lines, p.returncode)
else:
return(iter(p.stdout.readline, b''), 0)
return (iter(p.stdout.readline, b""), 0)
UPLOAD_FOLDER = './'
ALLOWED_EXTENSIONS = {'txt', 'sh'}
UPLOAD_FOLDER = "./"
ALLOWED_EXTENSIONS = {"txt", "sh"}
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
@app.route('/')
@app.route("/")
def hello_world():
return 'Hello World'
return "Hello World"
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST'])
@app.route("/upload", methods=["GET", "POST"])
def upload_file():
if request.method == 'POST':
if request.method == "POST":
# check if the post request has the file part
if 'file' not in request.files:
flash('No file part')
if "file" not in request.files:
flash("No file part")
return redirect(request.url)
file = request.files['file']
file = request.files["file"]
# If the user does not select a file, the browser submits an
# empty file without a filename.
if file.filename == '':
flash('No selected file')
if file.filename == "":
flash("No selected file")
return redirect(request.url)
if file and allowed_file(file.filename):
filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
return redirect(url_for('upload_file', name=filename))
return '''
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
return redirect(url_for("upload_file", name=filename))
return """
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
@ -59,12 +59,15 @@ def upload_file():
<input type=file name=file>
<input type=submit value=Upload>
</form>
'''
@app.route('/run', methods=['GET', 'POST'])
"""
@app.route("/run", methods=["GET", "POST"])
def parse_request():
data = request.data # data is empty
run_command(data, wait=True)
return 'Ok'
return "Ok"
if __name__ == '__main__':
app.run(port=5011)
if __name__ == "__main__":
app.run(port=5011)

View File

@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
from threading import Thread
from scipy import stats
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
logging.basicConfig(
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
)
total_start_seconds = time.perf_counter()
stage_start_seconds = total_start_seconds
def reportStageEnd(stage):
global stage_start_seconds, total_start_seconds
current = time.perf_counter()
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
print(
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
)
stage_start_seconds = current
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
return (
s.replace("\\", "\\\\")
.replace("\t", "\\t")
.replace("\n", "\\n")
.replace("\r", "")
)
parser = argparse.ArgumentParser(description='Run performance test.')
parser = argparse.ArgumentParser(description="Run performance test.")
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
parser.add_argument(
"file",
metavar="FILE",
type=argparse.FileType("r", encoding="utf-8"),
nargs=1,
help="test description file",
)
parser.add_argument(
"--host",
nargs="*",
default=["localhost"],
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
)
parser.add_argument(
"--port",
nargs="*",
default=[9000],
help="Space-separated list of server port(s). Corresponds to '--host' options.",
)
parser.add_argument(
"--runs", type=int, default=1, help="Number of query runs per server."
)
parser.add_argument(
"--max-queries",
type=int,
default=None,
help="Test no more than this number of queries, chosen at random.",
)
parser.add_argument(
"--queries-to-run",
nargs="*",
type=int,
default=None,
help="Space-separated list of indexes of queries to test.",
)
parser.add_argument(
"--max-query-seconds",
type=int,
default=15,
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--prewarm-max-query-seconds",
type=int,
default=180,
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
)
parser.add_argument(
"--profile-seconds",
type=int,
default=0,
help="For how many seconds to profile a query for which the performance has changed.",
)
parser.add_argument(
"--long", action="store_true", help="Do not skip the tests tagged as long."
)
parser.add_argument(
"--print-queries", action="store_true", help="Print test queries and exit."
)
parser.add_argument(
"--print-settings", action="store_true", help="Print test settings and exit."
)
parser.add_argument(
"--keep-created-tables",
action="store_true",
help="Don't drop the created tables after the test.",
)
parser.add_argument(
"--use-existing-tables",
action="store_true",
help="Don't create or drop the tables, use the existing ones instead.",
)
args = parser.parse_args()
reportStageEnd('start')
reportStageEnd("start")
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
reportStageEnd('parse')
reportStageEnd("parse")
# Process query parameters
subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
subst_elems = root.findall("substitutions/substitution")
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
for e in subst_elems:
name = e.find('name').text
values = [v.text for v in e.findall('values/value')]
name = e.find("name").text
values = [v.text for v in e.findall("values/value")]
if not values:
raise Exception(f'No values given for substitution {{{name}}}')
raise Exception(f"No values given for substitution {{{name}}}")
available_parameters[name] = values
@ -78,7 +146,7 @@ for e in subst_elems:
# parameters. The set of parameters is determined based on the first list.
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
# followed by CREATE in create queries section, so the order matters.
def substitute_parameters(query_templates, other_templates = []):
def substitute_parameters(query_templates, other_templates=[]):
query_results = []
other_results = [[]] * (len(other_templates))
for i, q in enumerate(query_templates):
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
# and reporting the queries marked as short.
test_queries = []
is_short = []
for e in root.findall('query'):
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
for e in root.findall("query"):
new_queries, [new_is_short] = substitute_parameters(
[e.text], [[e.attrib.get("short", "0")]]
)
test_queries += new_queries
is_short += [eval(s) for s in new_is_short]
assert(len(test_queries) == len(is_short))
assert len(test_queries) == len(is_short)
# If we're given a list of queries to run, check that it makes sense.
for i in args.queries_to_run or []:
if i < 0 or i >= len(test_queries):
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
print(
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
)
exit(1)
# If we're only asked to print the queries, do that and exit.
@ -125,60 +197,65 @@ if args.print_queries:
# Print short queries
for i, s in enumerate(is_short):
if s:
print(f'short\t{i}')
print(f"short\t{i}")
# If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'.
if args.print_settings:
for s in root.findall('settings/*'):
print(f'--{s.tag}={s.text}')
for s in root.findall("settings/*"):
print(f"--{s.tag}={s.text}")
exit(0)
# Skip long tests
if not args.long:
for tag in root.findall('.//tag'):
if tag.text == 'long':
print('skipped\tTest is tagged as long.')
for tag in root.findall(".//tag"):
if tag.text == "long":
print("skipped\tTest is tagged as long.")
sys.exit(0)
# Print report threshold for the test if it is set.
ignored_relative_change = 0.05
if 'max_ignored_relative_change' in root.attrib:
if "max_ignored_relative_change" in root.attrib:
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
print(f'report-threshold\t{ignored_relative_change}')
print(f"report-threshold\t{ignored_relative_change}")
reportStageEnd('before-connect')
reportStageEnd("before-connect")
# Open connections
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
servers = [
{"host": host or args.host[0], "port": port or args.port[0]}
for (host, port) in itertools.zip_longest(args.host, args.port)
]
# Force settings_is_important to fail queries on unknown settings.
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
all_connections = [
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
]
for i, s in enumerate(servers):
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
reportStageEnd('connect')
reportStageEnd("connect")
if not args.use_existing_tables:
# Run drop queries, ignoring errors. Do this before all other activity,
# because clickhouse_driver disconnects on error (this is not configurable),
# and the new connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_query_templates = [q.text for q in root.findall("drop_query")]
drop_queries = substitute_parameters(drop_query_templates)
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
try:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
except:
pass
reportStageEnd('drop-1')
reportStageEnd("drop-1")
# Apply settings.
settings = root.findall('settings/*')
settings = root.findall("settings/*")
for conn_index, c in enumerate(all_connections):
for s in settings:
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
# the test, which is wrong.
c.execute("select 1")
reportStageEnd('settings')
reportStageEnd("settings")
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
tables = [e.text for e in root.findall("preconditions/table_exists")]
for t in tables:
for c in all_connections:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
skipped_message = " ".join(exception_message.split("\n")[:2])
print(f"skipped\t{tsv_escape(skipped_message)}")
sys.exit(0)
reportStageEnd('preconditions')
reportStageEnd("preconditions")
if not args.use_existing_tables:
# Run create and fill queries. We will run them simultaneously for both
# servers, to save time. The weird XML search + filter is because we want to
# keep the relative order of elements, and etree doesn't support the
# appropriate xpath query.
create_query_templates = [q.text for q in root.findall('./*')
if q.tag in ('create_query', 'fill_query')]
create_query_templates = [
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
]
create_queries = substitute_parameters(create_query_templates)
# Disallow temporary tables, because the clickhouse_driver reconnects on
# errors, and temporary tables are destroyed. We want to be able to continue
# after some errors.
for q in create_queries:
if re.search('create temporary table', q, flags=re.IGNORECASE):
print(f"Temporary tables are not allowed in performance tests: '{q}'",
file = sys.stderr)
if re.search("create temporary table", q, flags=re.IGNORECASE):
print(
f"Temporary tables are not allowed in performance tests: '{q}'",
file=sys.stderr,
)
sys.exit(1)
def do_create(connection, index, queries):
for q in queries:
connection.execute(q)
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
threads = [
Thread(target = do_create, args = (connection, index, create_queries))
for index, connection in enumerate(all_connections)]
Thread(target=do_create, args=(connection, index, create_queries))
for index, connection in enumerate(all_connections)
]
for t in threads:
t.start()
@ -238,14 +319,16 @@ if not args.use_existing_tables:
for t in threads:
t.join()
reportStageEnd('create')
reportStageEnd("create")
# By default, test all queries.
queries_to_run = range(0, len(test_queries))
if args.max_queries:
# If specified, test a limited number of queries chosen at random.
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
queries_to_run = random.sample(
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
)
if args.queries_to_run:
# Run the specified queries.
@ -255,16 +338,16 @@ if args.queries_to_run:
profile_total_seconds = 0
for query_index in queries_to_run:
q = test_queries[query_index]
query_prefix = f'{test_name}.query{query_index}'
query_prefix = f"{test_name}.query{query_index}"
# We have some crazy long queries (about 100kB), so trim them to a sane
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
query_display_name = q
if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({query_index})'
query_display_name = f"{query_display_name[:1000]}...({query_index})"
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
# Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc.
@ -272,10 +355,10 @@ for query_index in queries_to_run:
# new one. We want to run them on the new server only, so that the PR author
# can ensure that the test works properly. Remember the errors we had on
# each server.
query_error_on_connection = [None] * len(all_connections);
query_error_on_connection = [None] * len(all_connections)
for conn_index, c in enumerate(all_connections):
try:
prewarm_id = f'{query_prefix}.prewarm0'
prewarm_id = f"{query_prefix}.prewarm0"
try:
# During the warmup runs, we will also:
@ -283,25 +366,30 @@ for query_index in queries_to_run:
# * collect profiler traces, which might be helpful for analyzing
# test coverage. We disable profiler for normal runs because
# it makes the results unstable.
res = c.execute(q, query_id = prewarm_id,
settings = {
'max_execution_time': args.prewarm_max_query_seconds,
'query_profiler_real_time_period_ns': 10000000,
'memory_profiler_step': '4Mi',
})
res = c.execute(
q,
query_id=prewarm_id,
settings={
"max_execution_time": args.prewarm_max_query_seconds,
"query_profiler_real_time_period_ns": 10000000,
"memory_profiler_step": "4Mi",
},
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (prewarm_id, *e.args)
e.message = prewarm_id + ': ' + e.message
e.message = prewarm_id + ": " + e.message
raise
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
print(
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except KeyboardInterrupt:
raise
except:
# FIXME the driver reconnects on error and we lose settings, so this
# might lead to further errors or unexpected behavior.
query_error_on_connection[conn_index] = traceback.format_exc();
query_error_on_connection[conn_index] = traceback.format_exc()
continue
# Report all errors that ocurred during prewarm and decide what to do next.
@ -311,14 +399,14 @@ for query_index in queries_to_run:
no_errors = []
for i, e in enumerate(query_error_on_connection):
if e:
print(e, file = sys.stderr)
print(e, file=sys.stderr)
else:
no_errors.append(i)
if len(no_errors) == 0:
continue
elif len(no_errors) < len(all_connections):
print(f'partial\t{query_index}\t{no_errors}')
print(f"partial\t{query_index}\t{no_errors}")
this_query_connections = [all_connections[index] for index in no_errors]
@ -337,27 +425,34 @@ for query_index in queries_to_run:
all_server_times.append([])
while True:
run_id = f'{query_prefix}.run{run}'
run_id = f"{query_prefix}.run{run}"
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
res = c.execute(
q,
query_id=run_id,
settings={"max_execution_time": args.max_query_seconds},
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
e.message = run_id + ": " + e.message
raise
elapsed = c.last_query.elapsed
all_server_times[conn_index].append(elapsed)
server_seconds += elapsed
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
if elapsed > args.max_query_seconds:
# Do not stop processing pathologically slow queries,
# since this may hide errors in other queries.
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
print(
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
file=sys.stderr,
)
# Be careful with the counter, after this line it's the next iteration
# already.
@ -386,7 +481,7 @@ for query_index in queries_to_run:
break
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much
@ -397,13 +492,15 @@ for query_index in queries_to_run:
# Don't fail if for some reason there are not enough measurements.
continue
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
pvalue = stats.ttest_ind(
all_server_times[0], all_server_times[1], equal_var=False
).pvalue
median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)).
relative_diff = (median[1] - median[0]) / median[0]
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
continue
@ -412,25 +509,31 @@ for query_index in queries_to_run:
profile_start_seconds = time.perf_counter()
run = 0
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
run_id = f'{query_prefix}.profile{run}'
run_id = f"{query_prefix}.profile{run}"
for conn_index, c in enumerate(this_query_connections):
try:
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
res = c.execute(
q,
query_id=run_id,
settings={"query_profiler_real_time_period_ns": 10000000},
)
print(
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
)
except clickhouse_driver.errors.Error as e:
# Add query id to the exception to make debugging easier.
e.args = (run_id, *e.args)
e.message = run_id + ': ' + e.message
e.message = run_id + ": " + e.message
raise
run += 1
profile_total_seconds += time.perf_counter() - profile_start_seconds
print(f'profile-total\t{profile_total_seconds}')
print(f"profile-total\t{profile_total_seconds}")
reportStageEnd('run')
reportStageEnd("run")
# Run drop queries
if not args.keep_created_tables and not args.use_existing_tables:
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
for conn_index, c in enumerate(all_connections):
for q in drop_queries:
c.execute(q)
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
reportStageEnd('drop-2')
reportStageEnd("drop-2")

View File

@ -12,9 +12,13 @@ import pprint
import sys
import traceback
parser = argparse.ArgumentParser(description='Create performance test report')
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
help='Which report to build')
parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument(
"--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
args = parser.parse_args()
tables = []
@ -31,8 +35,8 @@ unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad='#ffb0c0'
color_good='#b0d050'
color_bad = "#ffb0c0"
color_good = "#b0d050"
header_template = """
<!DOCTYPE html>
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
table_anchor = 0
row_anchor = 0
def currentTableAnchor():
global table_anchor
return f'{table_anchor}'
return f"{table_anchor}"
def newTableAnchor():
global table_anchor
table_anchor += 1
return currentTableAnchor()
def currentRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor}'
return f"{table_anchor}.{row_anchor}"
def nextRowAnchor():
global row_anchor
global table_anchor
return f'{table_anchor}.{row_anchor + 1}'
return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor():
global row_anchor
@ -178,43 +187,58 @@ def advanceRowAnchor():
def tr(x, anchor=None):
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor()
return f'<tr id={anchor}>{x}</tr>'
return f"<tr id={anchor}>{x}</tr>"
def td(value, cell_attributes = ''):
return '<td {cell_attributes}>{value}</td>'.format(
cell_attributes = cell_attributes,
value = value)
def th(value, cell_attributes = ''):
return '<th {cell_attributes}>{value}</th>'.format(
cell_attributes = cell_attributes,
value = value)
def td(value, cell_attributes=""):
return "<td {cell_attributes}>{value}</td>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes = [], anchor=None):
def th(value, cell_attributes=""):
return "<th {cell_attributes}>{value}</th>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr(
''.join([td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')
if a is not None and v is not None]),
anchor)
"".join(
[
td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes = []):
def tableHeader(cell_values, cell_attributes=[]):
return tr(
''.join([th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')
if a is not None and v is not None]))
"".join(
[
th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title):
cls = '-'.join(title.lower().split(' ')[:3]);
cls = "-".join(title.lower().split(" ")[:3])
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
help_anchor = '-'.join(title.lower().split(' '));
help_anchor = "-".join(title.lower().split(" "))
return f"""
<h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a>
@ -223,12 +247,14 @@ def tableStart(title):
<table class="{cls}">
"""
def tableEnd():
return '</table>'
return "</table>"
def tsvRows(n):
try:
with open(n, encoding='utf-8') as fd:
with open(n, encoding="utf-8") as fd:
result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = []
@ -237,27 +263,32 @@ def tsvRows(n):
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row)
return result
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
return []
def htmlRows(n):
rawRows = tsvRows(n)
result = ''
result = ""
for row in rawRows:
result += tableRow(row)
return result
def addSimpleTable(caption, columns, rows, pos=None):
global tables
text = ''
text = ""
if not rows:
return
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
text += tableEnd()
tables.insert(pos if pos else len(tables), text)
def add_tested_commits():
global report_errors
try:
addSimpleTable('Tested Commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
addSimpleTable(
"Tested Commits",
["Old", "New"],
[
[
"<pre>{}</pre>".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except:
# Don't fail if no commit info -- maybe it's a manual run.
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
def add_report_errors():
global tables
global report_errors
# Add the errors reported by various steps of comparison script
try:
report_errors += [l.strip() for l in open('report/errors.log')]
report_errors += [l.strip() for l in open("report/errors.log")]
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
if not report_errors:
return
text = tableStart('Errors while Building the Report')
text += tableHeader(['Error'])
text = tableStart("Errors while Building the Report")
text += tableHeader(["Error"])
for x in report_errors:
text += tableRow([x])
text += tableEnd()
# Insert after Tested Commits
tables.insert(1, text)
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
]
)
def add_errors_explained():
if not errors_explained:
return
text = '<a name="fail1"/>'
text += tableStart('Error Summary')
text += tableHeader(['Description'])
text += tableStart("Error Summary")
text += tableHeader(["Description"])
for row in errors_explained:
text += tableRow(row)
text += tableEnd()
@ -321,59 +364,81 @@ def add_errors_explained():
tables.insert(1, text)
if args.report == 'main':
if args.report == "main":
print((header_template.format()))
add_tested_commits()
run_error_rows = tsvRows('run-errors.tsv')
run_error_rows = tsvRows("run-errors.tsv")
error_tests += len(run_error_rows)
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
]
)
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
error_tests += len(slow_on_client_rows)
addSimpleTable('Slow on Client',
['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows)
addSimpleTable(
"Slow on Client",
["Client time,&nbsp;s", "Server time,&nbsp;s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
]
)
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
error_tests += len(unmarked_short_rows)
addSimpleTable('Unexpected Query Duration',
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
unmarked_short_rows)
addSimpleTable(
"Unexpected Query Duration",
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
unmarked_short_rows,
)
if unmarked_short_rows:
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
]
)
def add_partial():
rows = tsvRows('report/partial-queries-report.tsv')
rows = tsvRows("report/partial-queries-report.tsv")
if not rows:
return
global unstable_partial_queries, slow_average_tests, tables
text = tableStart('Partial Queries')
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
text = tableStart("Partial Queries")
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns)
attrs = ['' for c in columns]
attrs = ["" for c in columns]
for row in rows:
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
]
)
else:
attrs[1] = ''
attrs[1] = ""
if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
errors_explained.append(
[
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
]
)
slow_average_tests += 1
else:
attrs[0] = ''
attrs[0] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
@ -381,41 +446,45 @@ if args.report == 'main':
add_partial()
def add_changes():
rows = tsvRows('report/changed-perf.tsv')
rows = tsvRows("report/changed-perf.tsv")
if not rows:
return
global faster_queries, slower_queries, tables
text = tableStart('Changes in Performance')
text = tableStart("Changes in Performance")
columns = [
'Old,&nbsp;s', # 0
'New,&nbsp;s', # 1
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3
'p&nbsp;<&nbsp;0.01 threshold', # 4
'', # Failed # 5
'Test', # 6
'#', # 7
'Query', # 8
]
attrs = ['' for c in columns]
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 2
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 3
"p&nbsp;<&nbsp;0.01 threshold", # 4
"", # Failed # 5
"Test", # 6
"#", # 7
"Query", # 8
]
attrs = ["" for c in columns]
attrs[5] = None
text += tableHeader(columns, attrs)
for row in rows:
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]):
if float(row[3]) < 0.:
if float(row[3]) < 0.0:
faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
]
)
else:
attrs[2] = attrs[3] = ''
attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor)
@ -427,35 +496,35 @@ if args.report == 'main':
def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows('report/unstable-queries.tsv')
unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
'Old,&nbsp;s', #0
'New,&nbsp;s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'p&nbsp;&lt;&nbsp;0.01 threshold', #3
'', # Failed #4
'Test', #5
'#', #6
'Query' #7
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Relative difference (new&nbsp;-&nbsp;old)/old", # 2
"p&nbsp;&lt;&nbsp;0.01 threshold", # 3
"", # Failed #4
"Test", # 5
"#", # 6
"Query", # 7
]
attrs = ['' for c in columns]
attrs = ["" for c in columns]
attrs[4] = None
text = tableStart('Unstable Queries')
text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs)
for r in unstable_rows:
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]):
very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ''
attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them.
continue
@ -470,53 +539,70 @@ if args.report == 'main':
add_unstable_queries()
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable('Test Performance Changes',
['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
tsvRows('report/test-perf-changes.tsv'))
addSimpleTable(
"Test Performance Changes",
[
"Test",
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times():
global slow_average_tests, tables
rows = tsvRows('report/test-times.tsv')
rows = tsvRows("report/test-times.tsv")
if not rows:
return
columns = [
'Test', #0
'Wall clock time, entire test,&nbsp;s', #1
'Total client time for measured query runs,&nbsp;s', #2
'Queries', #3
'Longest query, total for measured runs,&nbsp;s', #4
'Wall clock time per query,&nbsp;s', #5
'Shortest query, total for measured runs,&nbsp;s', #6
'', # Runs #7
]
attrs = ['' for c in columns]
"Test", # 0
"Wall clock time, entire test,&nbsp;s", # 1
"Total client time for measured query runs,&nbsp;s", # 2
"Queries", # 3
"Longest query, total for measured runs,&nbsp;s", # 4
"Wall clock time per query,&nbsp;s", # 5
"Shortest query, total for measured runs,&nbsp;s", # 6
"", # Runs #7
]
attrs = ["" for c in columns]
attrs[7] = None
text = tableStart('Test Times')
text = tableStart("Test Times")
text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
errors_explained.append(
[
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else:
attrs[5] = ''
attrs[5] = ""
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
errors_explained.append(
[
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else:
attrs[4] = ''
attrs[4] = ""
text += tableRow(r, attrs, anchor)
@ -525,10 +611,17 @@ if args.report == 'main':
add_test_times()
addSimpleTable('Metric Changes',
['Metric', 'Old median value', 'New median value',
'Relative difference', 'Times difference'],
tsvRows('metrics/changes.tsv'))
addSimpleTable(
"Metric Changes",
[
"Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors()
add_errors_explained()
@ -536,7 +629,8 @@ if args.report == 'main':
for t in tables:
print(t)
print(f"""
print(
f"""
</div>
<p class="links">
<a href="all-queries.html">All queries</a>
@ -546,104 +640,111 @@ if args.report == 'main':
</p>
</body>
</html>
""")
"""
)
status = 'success'
message = 'See the report'
status = "success"
message = "See the report"
message_array = []
if slow_average_tests:
status = 'failure'
message_array.append(str(slow_average_tests) + ' too long')
status = "failure"
message_array.append(str(slow_average_tests) + " too long")
if faster_queries:
message_array.append(str(faster_queries) + ' faster')
message_array.append(str(faster_queries) + " faster")
if slower_queries:
if slower_queries > 3:
status = 'failure'
message_array.append(str(slower_queries) + ' slower')
status = "failure"
message_array.append(str(slower_queries) + " slower")
if unstable_partial_queries:
very_unstable_queries += unstable_partial_queries
status = 'failure'
status = "failure"
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
if very_unstable_queries > 5:
error_tests += very_unstable_queries
status = 'failure'
message_array.append(str(very_unstable_queries) + ' unstable')
status = "failure"
message_array.append(str(very_unstable_queries) + " unstable")
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.insert(0, str(error_tests) + ' errors')
status = "failure"
message_array.insert(0, str(error_tests) + " errors")
if message_array:
message = ', '.join(message_array)
message = ", ".join(message_array)
if report_errors:
status = 'failure'
message = 'Errors while building the report.'
status = "failure"
message = "Errors while building the report."
print(("""
print(
(
"""
<!--status: {status}-->
<!--message: {message}-->
""".format(status=status, message=message)))
""".format(
status=status, message=message
)
)
)
elif args.report == 'all-queries':
elif args.report == "all-queries":
print((header_template.format()))
add_tested_commits()
def add_all_queries():
rows = tsvRows('report/all-queries.tsv')
rows = tsvRows("report/all-queries.tsv")
if not rows:
return
columns = [
'', # Changed #0
'', # Unstable #1
'Old,&nbsp;s', #2
'New,&nbsp;s', #3
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
'p&nbsp;&lt;&nbsp;0.01 threshold', #6
'Test', #7
'#', #8
'Query', #9
]
attrs = ['' for c in columns]
"", # Changed #0
"", # Unstable #1
"Old,&nbsp;s", # 2
"New,&nbsp;s", # 3
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 4
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 5
"p&nbsp;&lt;&nbsp;0.01 threshold", # 6
"Test", # 7
"#", # 8
"Query", # 9
]
attrs = ["" for c in columns]
attrs[0] = None
attrs[1] = None
text = tableStart('All Query Times')
text = tableStart("All Query Times")
text += tableHeader(columns, attrs)
for r in rows:
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
if int(r[1]):
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
attrs[6] = ""
if int(r[0]):
if float(r[5]) > 0.:
if float(r[5]) > 0.0:
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
else:
attrs[4] = attrs[5] = f'style="background: {color_good}"'
else:
attrs[4] = attrs[5] = ''
attrs[4] = attrs[5] = ""
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
attrs[3] = ''
attrs[2] = ""
attrs[3] = ""
text += tableRow(r, attrs, anchor)
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
for t in tables:
print(t)
print(f"""
print(
f"""
</div>
<p class="links">
<a href="report.html">Main report</a>
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
</p>
</body>
</html>
""")
"""
)

View File

@ -7,18 +7,19 @@ import csv
RESULT_LOG_NAME = "run.log"
def process_result(result_folder):
status = "success"
description = 'Server started and responded'
description = "Server started and responded"
summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
lines = run_log.read().split('\n')
if not lines or lines[0].strip() != 'OK':
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split("\n")
if not lines or lines[0].strip() != "OK":
status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs'
description = "Server failed to respond, see result in logs"
result_logs = []
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
@ -38,20 +39,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of split build smoke test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -10,11 +10,18 @@ def process_result(result_folder):
status = "success"
summary = []
paths = []
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
tests = [
"TLPWhere",
"TLPGroupBy",
"TLPHaving",
"TLPWhereGroupBy",
"TLPDistinct",
"TLPAggregate",
]
for test in tests:
err_path = '{}/{}.err'.format(result_folder, test)
out_path = '{}/{}.out'.format(result_folder, test)
err_path = "{}/{}.err".format(result_folder, test)
out_path = "{}/{}.out".format(result_folder, test)
if not os.path.exists(err_path):
logging.info("No output err on path %s", err_path)
summary.append((test, "SKIPPED"))
@ -23,24 +30,24 @@ def process_result(result_folder):
else:
paths.append(err_path)
paths.append(out_path)
with open(err_path, 'r') as f:
if 'AssertionError' in f.read():
with open(err_path, "r") as f:
if "AssertionError" in f.read():
summary.append((test, "FAIL"))
status = 'failure'
status = "failure"
else:
summary.append((test, "OK"))
logs_path = '{}/logs.tar.gz'.format(result_folder)
logs_path = "{}/logs.tar.gz".format(result_folder)
if not os.path.exists(logs_path):
logging.info("No logs tar on path %s", logs_path)
else:
paths.append(logs_path)
stdout_path = '{}/stdout.log'.format(result_folder)
stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path)
else:
paths.append(stdout_path)
stderr_path = '{}/stderr.log'.format(result_folder)
stderr_path = "{}/stderr.log".format(result_folder)
if not os.path.exists(stderr_path):
logging.info("No stderr log on path %s", stderr_path)
else:
@ -52,20 +59,22 @@ def process_result(result_folder):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of sqlancer test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)

View File

@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
shellcheck \
yamllint \
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -14,6 +14,7 @@ def process_result(result_folder):
("header duplicates", "duplicate_output.txt"),
("shellcheck", "shellcheck_output.txt"),
("style", "style_output.txt"),
("black", "black_output.txt"),
("typos", "typos_output.txt"),
("whitespaces", "whitespaces_output.txt"),
("workflows", "workflows_output.txt"),

View File

@ -7,6 +7,8 @@ echo "Check duplicates" | ts
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt
echo "Check python formatting with black" | ts
./check-black -n |& tee /test_output/black_output.txt
echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt
echo "Check whitespaces" | ts

View File

@ -22,9 +22,9 @@ def process_result(result_folder):
total_other = 0
test_results = []
for test in results["tests"]:
test_name = test['test']['test_name']
test_result = test['result']['result_type'].upper()
test_time = str(test['result']['message_rtime'])
test_name = test["test"]["test_name"]
test_result = test["result"]["result_type"].upper()
test_time = str(test["result"]["message_rtime"])
total_tests += 1
if test_result == "OK":
total_ok += 1
@ -39,24 +39,29 @@ def process_result(result_folder):
else:
status = "success"
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
description = "failed: {}, passed: {}, other: {}".format(
total_fail, total_ok, total_other
)
return status, description, test_results, [json_path, test_binary_log]
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
parser.add_argument("--in-results-dir", default='./')
parser.add_argument("--out-results-file", default='./test_results.tsv')
parser.add_argument("--out-status-file", default='./check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of Testflows tests"
)
parser.add_argument("--in-results-dir", default="./")
parser.add_argument("--out-results-file", default="./test_results.tsv")
parser.add_argument("--out-status-file", default="./check_status.tsv")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)
@ -64,4 +69,3 @@ if __name__ == "__main__":
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -5,24 +5,26 @@ import logging
import argparse
import csv
OK_SIGN = 'OK ]'
FAILED_SIGN = 'FAILED ]'
SEGFAULT = 'Segmentation fault'
SIGNAL = 'received signal SIG'
PASSED = 'PASSED'
OK_SIGN = "OK ]"
FAILED_SIGN = "FAILED ]"
SEGFAULT = "Segmentation fault"
SIGNAL = "received signal SIG"
PASSED = "PASSED"
def get_test_name(line):
elements = reversed(line.split(' '))
elements = reversed(line.split(" "))
for element in elements:
if '(' not in element and ')' not in element:
if "(" not in element and ")" not in element:
return element
raise Exception("No test name in line '{}'".format(line))
def process_result(result_folder):
summary = []
total_counter = 0
failed_counter = 0
result_log_path = '{}/test_result.txt'.format(result_folder)
result_log_path = "{}/test_result.txt".format(result_folder)
if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path)
return "exception", "No output log", []
@ -30,7 +32,7 @@ def process_result(result_folder):
status = "success"
description = ""
passed = False
with open(result_log_path, 'r') as test_result:
with open(result_log_path, "r") as test_result:
for line in test_result:
if OK_SIGN in line:
logging.info("Found ok line: '%s'", line)
@ -38,7 +40,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK"))
total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name)
@ -67,25 +69,30 @@ def process_result(result_folder):
status = "failure"
if not description:
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
description += "fail: {}, passed: {}".format(
failed_counter, total_counter - failed_counter
)
return status, description, summary
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of unit tests"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)
@ -93,4 +100,3 @@ if __name__ == "__main__":
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
logging.info("Result written")

View File

@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path):
total = 0
skipped = 0
@ -26,7 +27,7 @@ def process_test_log(log_path):
retries = False
task_timeout = True
test_results = []
with open(log_path, 'r') as test_file:
with open(log_path, "r") as test_file:
for line in test_file:
original_line = line
line = line.strip()
@ -36,12 +37,15 @@ def process_test_log(log_path):
hung = True
if RETRIES_SIGN in line:
retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
test_name = line.split(' ')[2].split(':')[0]
if any(
sign in line
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
):
test_name = line.split(" ")[2].split(":")[0]
test_time = ''
test_time = ""
try:
time_token = line.split(']')[1].strip().split()[0]
time_token = line.split("]")[1].strip().split()[0]
float(time_token)
test_time = time_token
except:
@ -66,9 +70,22 @@ def process_test_log(log_path):
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
test_results[-1][3].append(original_line)
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
test_results = [
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
]
return (
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
)
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path):
test_results = []
@ -76,16 +93,26 @@ def process_result(result_path):
description = ""
files = os.listdir(result_path)
if files:
logging.info("Find files in result folder %s", ','.join(files))
result_path = os.path.join(result_path, 'test_result.txt')
logging.info("Find files in result folder %s", ",".join(files))
result_path = os.path.join(result_path, "test_result.txt")
else:
result_path = None
description = "No output log"
state = "error"
if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
(
total,
skipped,
unknown,
failed,
success,
hung,
task_timeout,
retries,
test_results,
) = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
logging.info("Is flacky check: %s", is_flacky_check)
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -120,20 +147,22 @@ def process_result(result_path):
def write_results(results_file, status_file, results, status):
with open(results_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, 'w') as f:
out = csv.writer(f, delimiter='\t')
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
parser.add_argument("--in-results-dir", default='/test_output/')
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of functional tests"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
args = parser.parse_args()
state, description, test_results = process_result(args.in_results_dir)

View File

@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
Python code is checked with [black](https://github.com/psf/black/).
### Report Details
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).

View File

@ -15,24 +15,24 @@ import website
def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
src_path = root
src_index = os.path.join(src_path, 'index.html')
src_index = os.path.join(src_path, "index.html")
rel_path = os.path.relpath(src_path, site_temp)
dst_path = os.path.join(main_site_dir, rel_path, 'amp')
dst_index = os.path.join(dst_path, 'index.html')
dst_path = os.path.join(main_site_dir, rel_path, "amp")
dst_index = os.path.join(dst_path, "index.html")
logging.debug(f'Generating AMP version for {rel_path} ({lang})')
logging.debug(f"Generating AMP version for {rel_path} ({lang})")
os.makedirs(dst_path)
with open(src_index, 'r') as f:
with open(src_index, "r") as f:
content = f.read()
css_in = ' '.join(website.get_css_in(args))
css_in = " ".join(website.get_css_in(args))
command = f"purifycss --min {css_in} '{src_index}'"
logging.debug(command)
inline_css = subprocess.check_output(command, shell=True).decode('utf-8')
inline_css = inline_css.replace('!important', '').replace('/*!', '/*')
inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
inline_css = cssmin.cssmin(inline_css)
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css)
content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
with open(dst_index, 'w') as f:
with open(dst_index, "w") as f:
f.write(content)
return dst_index
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
def build_amp(lang, args, cfg):
# AMP docs: https://amp.dev/documentation/
logging.info(f'Building AMP version for {lang}')
logging.info(f"Building AMP version for {lang}")
with util.temp_dir() as site_temp:
extra = cfg.data['extra']
main_site_dir = cfg.data['site_dir']
extra['is_amp'] = True
cfg.load_dict({
'site_dir': site_temp,
'extra': extra
})
extra = cfg.data["extra"]
main_site_dir = cfg.data["site_dir"]
extra["is_amp"] = True
cfg.load_dict({"site_dir": site_temp, "extra": extra})
try:
mkdocs.commands.build.build(cfg)
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
paths = []
for root, _, filenames in os.walk(site_temp):
if 'index.html' in filenames:
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir))
logging.info(f'Finished building AMP version for {lang}')
if "index.html" in filenames:
paths.append(
prepare_amp_html(lang, args, root, site_temp, main_site_dir)
)
logging.info(f"Finished building AMP version for {lang}")
def html_to_amp(content):
soup = bs4.BeautifulSoup(
content,
features='html.parser'
)
soup = bs4.BeautifulSoup(content, features="html.parser")
for tag in soup.find_all():
if tag.attrs.get('id') == 'tostring':
tag.attrs['id'] = '_tostring'
if tag.name == 'img':
tag.name = 'amp-img'
tag.attrs['layout'] = 'responsive'
src = tag.attrs['src']
if not (src.startswith('/') or src.startswith('http')):
tag.attrs['src'] = f'../{src}'
if not tag.attrs.get('width'):
tag.attrs['width'] = '640'
if not tag.attrs.get('height'):
tag.attrs['height'] = '320'
if tag.name == 'iframe':
tag.name = 'amp-iframe'
tag.attrs['layout'] = 'responsive'
del tag.attrs['alt']
del tag.attrs['allowfullscreen']
if not tag.attrs.get('width'):
tag.attrs['width'] = '640'
if not tag.attrs.get('height'):
tag.attrs['height'] = '320'
elif tag.name == 'a':
href = tag.attrs.get('href')
if tag.attrs.get("id") == "tostring":
tag.attrs["id"] = "_tostring"
if tag.name == "img":
tag.name = "amp-img"
tag.attrs["layout"] = "responsive"
src = tag.attrs["src"]
if not (src.startswith("/") or src.startswith("http")):
tag.attrs["src"] = f"../{src}"
if not tag.attrs.get("width"):
tag.attrs["width"] = "640"
if not tag.attrs.get("height"):
tag.attrs["height"] = "320"
if tag.name == "iframe":
tag.name = "amp-iframe"
tag.attrs["layout"] = "responsive"
del tag.attrs["alt"]
del tag.attrs["allowfullscreen"]
if not tag.attrs.get("width"):
tag.attrs["width"] = "640"
if not tag.attrs.get("height"):
tag.attrs["height"] = "320"
elif tag.name == "a":
href = tag.attrs.get("href")
if href:
if not (href.startswith('/') or href.startswith('http')):
if '#' in href:
href, anchor = href.split('#')
if not (href.startswith("/") or href.startswith("http")):
if "#" in href:
href, anchor = href.split("#")
else:
anchor = None
href = f'../{href}amp/'
href = f"../{href}amp/"
if anchor:
href = f'{href}#{anchor}'
tag.attrs['href'] = href
href = f"{href}#{anchor}"
tag.attrs["href"] = href
content = str(soup)
return website.minify_html(content)

View File

@ -17,54 +17,52 @@ import util
def build_for_lang(lang, args):
logging.info(f'Building {lang} blog')
logging.info(f"Building {lang} blog")
try:
theme_cfg = {
'name': None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
'language': lang,
'direction': 'ltr',
'static_templates': ['404.html'],
'extra': {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
}
"name": None,
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
"language": lang,
"direction": "ltr",
"static_templates": ["404.html"],
"extra": {
"now": int(
time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
}
# the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = {
'en': 'English'
}
languages = {"en": "English"}
site_names = {
'en': 'ClickHouse Blog'
}
site_names = {"en": "ClickHouse Blog"}
assert len(site_names) == len(languages)
site_dir = os.path.join(args.blog_output_dir, lang)
plugins = ['macros']
plugins = ["macros"]
if args.htmlproofer:
plugins.append('htmlproofer')
plugins.append("htmlproofer")
website_url = 'https://clickhouse.com'
site_name = site_names.get(lang, site_names['en'])
website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names["en"])
blog_nav, post_meta = nav.build_blog_nav(lang, args)
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/blog/{lang}/',
site_url=f"{website_url}/blog/{lang}/",
docs_dir=os.path.join(args.blog_dir, lang),
site_dir=site_dir,
strict=True,
theme=theme_cfg,
nav=blog_nav,
copyright='©20162022 ClickHouse, Inc.',
copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',
edit_uri=f'edit/master/website/blog/{lang}',
repo_name="ClickHouse/ClickHouse",
repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f"edit/master/website/blog/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins,
extra=dict(
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
website_url=website_url,
events=args.events,
languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False,
is_blog=True,
post_meta=post_meta,
today=datetime.date.today().isoformat()
)
today=datetime.date.today().isoformat(),
),
)
cfg = config.load_config(**raw_config)
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
redirects.build_blog_redirects(args)
env = util.init_jinja2_env(args)
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f:
rss_template_string = f.read().decode('utf-8').strip()
with open(
os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
) as f:
rss_template_string = f.read().decode("utf-8").strip()
rss_template = env.from_string(rss_template_string)
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
f.write(rss_template.render({'config': raw_config}))
with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
f.write(rss_template.render({"config": raw_config}))
logging.info(f'Finished building {lang} blog')
logging.info(f"Finished building {lang} blog")
except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e))
raise SystemExit("\n" + str(e))
def build_blog(args):
tasks = []
for lang in args.blog_lang.split(','):
for lang in args.blog_lang.split(","):
if lang:
tasks.append((lang, args,))
tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False)

View File

@ -30,76 +30,76 @@ import website
from cmake_in_clickhouse_generator import generate_cmake_flags_files
class ClickHouseMarkdown(markdown.extensions.Extension):
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if '<!--hide-->' not in line:
if "<!--hide-->" not in line:
yield line
def extendMarkdown(self, md):
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
md.preprocessors.register(
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
)
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
def build_for_lang(lang, args):
logging.info(f'Building {lang} docs')
os.environ['SINGLE_PAGE'] = '0'
logging.info(f"Building {lang} docs")
os.environ["SINGLE_PAGE"] = "0"
try:
theme_cfg = {
'name': None,
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
'language': lang,
'direction': 'rtl' if lang == 'fa' else 'ltr',
'static_templates': ['404.html'],
'extra': {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
}
"name": None,
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
"language": lang,
"direction": "rtl" if lang == "fa" else "ltr",
"static_templates": ["404.html"],
"extra": {
"now": int(
time.mktime(datetime.datetime.now().timetuple())
) # TODO better way to avoid caching
},
}
# the following list of languages is sorted according to
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
languages = {
'en': 'English',
'zh': '中文',
'ru': 'Русский',
'ja': '日本語'
}
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
site_names = {
'en': 'ClickHouse %s Documentation',
'zh': 'ClickHouse文档 %s',
'ru': 'Документация ClickHouse %s',
'ja': 'ClickHouseドキュメント %s'
"en": "ClickHouse %s Documentation",
"zh": "ClickHouse文档 %s",
"ru": "Документация ClickHouse %s",
"ja": "ClickHouseドキュメント %s",
}
assert len(site_names) == len(languages)
site_dir = os.path.join(args.docs_output_dir, lang)
plugins = ['macros']
plugins = ["macros"]
if args.htmlproofer:
plugins.append('htmlproofer')
plugins.append("htmlproofer")
website_url = 'https://clickhouse.com'
site_name = site_names.get(lang, site_names['en']) % ''
site_name = site_name.replace(' ', ' ')
website_url = "https://clickhouse.com"
site_name = site_names.get(lang, site_names["en"]) % ""
site_name = site_name.replace(" ", " ")
raw_config = dict(
site_name=site_name,
site_url=f'{website_url}/docs/{lang}/',
site_url=f"{website_url}/docs/{lang}/",
docs_dir=os.path.join(args.docs_dir, lang),
site_dir=site_dir,
strict=True,
theme=theme_cfg,
copyright='©20162022 ClickHouse, Inc.',
copyright="©20162022 ClickHouse, Inc.",
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',
edit_uri=f'edit/master/docs/{lang}',
repo_name="ClickHouse/ClickHouse",
repo_url="https://github.com/ClickHouse/ClickHouse/",
edit_uri=f"edit/master/docs/{lang}",
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
plugins=plugins,
extra=dict(
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
website_url=website_url,
events=args.events,
languages=languages,
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
is_amp=False,
is_blog=False
)
is_blog=False,
),
)
# Clean to be safe if last build finished abnormally
single_page.remove_temporary_files(lang, args)
raw_config['nav'] = nav.build_docs_nav(lang, args)
raw_config["nav"] = nav.build_docs_nav(lang, args)
cfg = config.load_config(**raw_config)
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
amp.build_amp(lang, args, cfg)
if not args.skip_single_page:
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg)
single_page.build_single_page_version(
lang, args, raw_config.get("nav"), cfg
)
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
logging.info(f'Finished building {lang} docs')
logging.info(f"Finished building {lang} docs")
except exceptions.ConfigurationError as e:
raise SystemExit('\n' + str(e))
raise SystemExit("\n" + str(e))
def build_docs(args):
tasks = []
for lang in args.lang.split(','):
for lang in args.lang.split(","):
if lang:
tasks.append((lang, args,))
tasks.append(
(
lang,
args,
)
)
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
redirects.build_docs_redirects(args)
@ -171,56 +178,64 @@ def build(args):
redirects.build_static_redirects(args)
if __name__ == '__main__':
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
if __name__ == "__main__":
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
# A root path to ClickHouse source code.
src_dir = '..'
src_dir = ".."
website_dir = os.path.join(src_dir, 'website')
website_dir = os.path.join(src_dir, "website")
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--lang', default='en,ru,zh,ja')
arg_parser.add_argument('--blog-lang', default='en')
arg_parser.add_argument('--docs-dir', default='.')
arg_parser.add_argument('--theme-dir', default=website_dir)
arg_parser.add_argument('--website-dir', default=website_dir)
arg_parser.add_argument('--src-dir', default=src_dir)
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
arg_parser.add_argument('--output-dir', default='build')
arg_parser.add_argument('--nav-limit', type=int, default='0')
arg_parser.add_argument('--skip-multi-page', action='store_true')
arg_parser.add_argument('--skip-single-page', action='store_true')
arg_parser.add_argument('--skip-amp', action='store_true')
arg_parser.add_argument('--skip-website', action='store_true')
arg_parser.add_argument('--skip-blog', action='store_true')
arg_parser.add_argument('--skip-git-log', action='store_true')
arg_parser.add_argument('--skip-docs', action='store_true')
arg_parser.add_argument('--test-only', action='store_true')
arg_parser.add_argument('--minify', action='store_true')
arg_parser.add_argument('--htmlproofer', action='store_true')
arg_parser.add_argument('--no-docs-macros', action='store_true')
arg_parser.add_argument('--save-raw-single-page', type=str)
arg_parser.add_argument('--livereload', type=int, default='0')
arg_parser.add_argument('--verbose', action='store_true')
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
arg_parser.add_argument("--blog-lang", default="en")
arg_parser.add_argument("--docs-dir", default=".")
arg_parser.add_argument("--theme-dir", default=website_dir)
arg_parser.add_argument("--website-dir", default=website_dir)
arg_parser.add_argument("--src-dir", default=src_dir)
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
arg_parser.add_argument("--output-dir", default="build")
arg_parser.add_argument("--nav-limit", type=int, default="0")
arg_parser.add_argument("--skip-multi-page", action="store_true")
arg_parser.add_argument("--skip-single-page", action="store_true")
arg_parser.add_argument("--skip-amp", action="store_true")
arg_parser.add_argument("--skip-website", action="store_true")
arg_parser.add_argument("--skip-blog", action="store_true")
arg_parser.add_argument("--skip-git-log", action="store_true")
arg_parser.add_argument("--skip-docs", action="store_true")
arg_parser.add_argument("--test-only", action="store_true")
arg_parser.add_argument("--minify", action="store_true")
arg_parser.add_argument("--htmlproofer", action="store_true")
arg_parser.add_argument("--no-docs-macros", action="store_true")
arg_parser.add_argument("--save-raw-single-page", type=str)
arg_parser.add_argument("--livereload", type=int, default="0")
arg_parser.add_argument("--verbose", action="store_true")
args = arg_parser.parse_args()
args.minify = False # TODO remove
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
stream=sys.stderr
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
)
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
from github import get_events
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
args.rev = (
subprocess.check_output("git rev-parse HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_short = (
subprocess.check_output("git rev-parse --short HEAD", shell=True)
.decode("utf-8")
.strip()
)
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
args.events = get_events(args)
if args.test_only:
@ -233,18 +248,20 @@ if __name__ == '__main__':
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
from build import build
build(args)
if args.livereload:
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
new_args = sys.executable + ' ' + ' '.join(new_args)
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server()
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
server.serve(
root=args.output_dir,
host='0.0.0.0',
port=args.livereload
server.watch(
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
)
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)

View File

@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
Entity = Tuple[str, str, str]
# https://regex101.com/r/R6iogw/12
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
cmake_option_regex: str = (
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
)
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
default_anchor_str: str = "[`{name}`](#{anchor})"
comment_var_regex: str = r"\${(.+)}"
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
def make_anchor(t: str) -> str:
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
return "".join(
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
)
def process_comment(comment: str) -> str:
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
(line, comment) = line_comment
(name, description, default) = entity
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
formatted_default: str = "`" + default + "`"
formatted_name: str = name_str.format(
anchor=make_anchor(name),
name=name,
path=path,
line=line)
anchor=make_anchor(name), name=name, path=path, line=line
)
formatted_description: str = "".join(description.split("\n"))
formatted_comment: str = process_comment(comment)
formatted_entity: str = "| {} | {} | {} | {} |".format(
formatted_name, formatted_default, formatted_description, formatted_comment)
formatted_name, formatted_default, formatted_description, formatted_comment
)
entities[name] = path, formatted_entity
def process_file(root_path: str, file_path: str, file_name: str) -> None:
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
with open(os.path.join(file_path, file_name), "r") as cmake_file:
contents: str = cmake_file.read()
def get_line_and_comment(target: str) -> Tuple[int, str]:
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
comment: str = ""
for n, line in enumerate(contents_list):
if 'option' not in line.lower() or target not in line:
if "option" not in line.lower() or target not in line:
continue
for maybe_comment_line in contents_list[n - 1::-1]:
for maybe_comment_line in contents_list[n - 1 :: -1]:
if not re.match("\s*#\s*", maybe_comment_line):
break
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
# line numbering starts with 1
return n + 1, comment
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
matches: Optional[List[Entity]] = re.findall(
cmake_option_regex, contents, re.MULTILINE
)
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
if file_rel_path_with_name.startswith('/'):
file_rel_path_with_name: str = os.path.join(
file_path[len(root_path) :], file_name
)
if file_rel_path_with_name.startswith("/"):
file_rel_path_with_name = file_rel_path_with_name[1:]
if matches:
for entity in matches:
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
build_entity(
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
)
def process_folder(root_path: str, name: str) -> None:
for root, _, files in os.walk(os.path.join(root_path, name)):
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
if f == "CMakeLists.txt" or ".cmake" in f:
process_file(root_path, root, f)
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
def generate_cmake_flags_files() -> None:
root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
output_file_name: str = os.path.join(
root_path, "docs/en/development/cmake-in-clickhouse.md"
)
header_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
)
footer_file_name: str = os.path.join(
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
)
process_file(root_path, root_path, "CMakeLists.txt")
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
f.write(entities[k][1] + "\n")
ignored_keys.append(k)
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
table_header)
f.write(
"\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
+ table_header
)
for k in sorted_keys:
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None:
with open(footer_file_name, "r") as footer:
f.write(footer.read())
other_languages = ["docs/ja/development/cmake-in-clickhouse.md",
"docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md"]
other_languages = [
"docs/ja/development/cmake-in-clickhouse.md",
"docs/zh/development/cmake-in-clickhouse.md",
"docs/ru/development/cmake-in-clickhouse.md",
]
for lang in other_languages:
other_file_name = os.path.join(root_path, lang)
if os.path.exists(other_file_name):
os.unlink(other_file_name)
os.unlink(other_file_name)
os.symlink(output_file_name, other_file_name)
if __name__ == '__main__':
if __name__ == "__main__":
generate_cmake_flags_files()

View File

@ -8,7 +8,7 @@ import contextlib
from git import cmd
from tempfile import NamedTemporaryFile
SCRIPT_DESCRIPTION = '''
SCRIPT_DESCRIPTION = """
usage: ./easy_diff.py language/document path
Show the difference between a language document and an English document.
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
OPTIONS:
-h, --help show this help message and exit
--no-pager use stdout as difference result output
'''
"""
SCRIPT_PATH = os.path.abspath(__file__)
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
def execute(commands):
@ -70,19 +70,41 @@ def execute(commands):
def get_hash(file_name):
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
def diff_file(reference_file, working_file, out):
if not os.path.exists(reference_file):
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
raise RuntimeError(
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
)
if os.path.islink(working_file):
out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
elif not os.path.exists(working_file):
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
out.writelines(
[
"Need link document "
+ os.path.abspath(reference_file)
+ " to "
+ os.path.abspath(working_file)
]
)
elif get_hash(working_file) != get_hash(reference_file):
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
out.writelines(
[
(
execute(
[
"git",
"diff",
get_hash(working_file).strip('"'),
reference_file,
]
).encode("utf-8")
)
]
)
return 0
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
for list_item in os.listdir(reference_directory):
working_item = os.path.join(working_directory, list_item)
reference_item = os.path.join(reference_directory, list_item)
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
if (
diff_file(reference_item, working_item, out)
if os.path.isfile(reference_item)
else diff_directory(reference_item, working_item, out) != 0
):
return 1
return 0
def find_language_doc(custom_document, other_language='en', children=[]):
def find_language_doc(custom_document, other_language="en", children=[]):
if len(custom_document) == 0:
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
raise RuntimeError(
"The "
+ os.path.join(custom_document, *children)
+ " is not in docs directory."
)
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
children.insert(0, os.path.split(custom_document)[1])
return find_language_doc(os.path.split(custom_document)[0], other_language, children)
return find_language_doc(
os.path.split(custom_document)[0], other_language, children
)
class ToPager:
@ -119,7 +151,7 @@ class ToPager:
def close(self):
self.temp_named_file.flush()
git_pager = execute(['git', 'var', 'GIT_PAGER'])
git_pager = execute(["git", "var", "GIT_PAGER"])
subprocess.check_call([git_pager, self.temp_named_file.name])
self.temp_named_file.close()
@ -135,12 +167,20 @@ class ToStdOut:
self.system_stdout_stream = system_stdout_stream
if __name__ == '__main__':
if __name__ == "__main__":
arguments = SCRIPT_COMMAND_PARSER.parse_args()
if arguments.help or not arguments.path:
sys.stdout.write(SCRIPT_DESCRIPTION)
sys.exit(0)
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
exit(diff_directory(find_language_doc(working_language), working_language, writer))
working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
with contextlib.closing(
ToStdOut(sys.stdout)
if arguments.no_pager
else ToPager(NamedTemporaryFile("r+"))
) as writer:
exit(
diff_directory(
find_language_doc(working_language), working_language, writer
)
)

View File

@ -16,27 +16,26 @@ import util
def get_events(args):
events = []
skip = True
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f:
with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
for line in f:
if skip:
if 'Upcoming Events' in line:
if "Upcoming Events" in line:
skip = False
else:
if not line:
continue
line = line.strip().split('](')
line = line.strip().split("](")
if len(line) == 2:
tail = line[1].split(') ')
events.append({
'signup_link': tail[0],
'event_name': line[0].replace('* [', ''),
'event_date': tail[1].replace('on ', '').replace('.', '')
})
tail = line[1].split(") ")
events.append(
{
"signup_link": tail[0],
"event_name": line[0].replace("* [", ""),
"event_date": tail[1].replace("on ", "").replace(".", ""),
}
)
return events
if __name__ == '__main__':
logging.basicConfig(
level=logging.DEBUG,
stream=sys.stderr
)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)

View File

@ -16,74 +16,79 @@ import slugify as slugify_impl
def slugify(value, separator):
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True)
return slugify_impl.slugify(
value, separator=separator, word_boundary=True, save_order=True
)
MARKDOWN_EXTENSIONS = [
'mdx_clickhouse',
'admonition',
'attr_list',
'def_list',
'codehilite',
'nl2br',
'sane_lists',
'pymdownx.details',
'pymdownx.magiclink',
'pymdownx.superfences',
'extra',
{
'toc': {
'permalink': True,
'slugify': slugify
}
}
"mdx_clickhouse",
"admonition",
"attr_list",
"def_list",
"codehilite",
"nl2br",
"sane_lists",
"pymdownx.details",
"pymdownx.magiclink",
"pymdownx.superfences",
"extra",
{"toc": {"permalink": True, "slugify": slugify}},
]
class ClickHouseLinkMixin(object):
def handleMatch(self, m, data):
single_page = (os.environ.get('SINGLE_PAGE') == '1')
single_page = os.environ.get("SINGLE_PAGE") == "1"
try:
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
except IndexError:
return
if el is not None:
href = el.get('href') or ''
is_external = href.startswith('http:') or href.startswith('https:')
href = el.get("href") or ""
is_external = href.startswith("http:") or href.startswith("https:")
if is_external:
if not href.startswith('https://clickhouse.com'):
el.set('rel', 'external nofollow noreferrer')
if not href.startswith("https://clickhouse.com"):
el.set("rel", "external nofollow noreferrer")
elif single_page:
if '#' in href:
el.set('href', '#' + href.split('#', 1)[1])
if "#" in href:
el.set("href", "#" + href.split("#", 1)[1])
else:
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/'))
el.set(
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
)
return el, start, end
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor):
class ClickHouseAutolinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
):
pass
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor):
class ClickHouseLinkPattern(
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
):
pass
class ClickHousePreprocessor(markdown.util.Processor):
def run(self, lines):
for line in lines:
if '<!--hide-->' not in line:
if "<!--hide-->" not in line:
yield line
class ClickHouseMarkdown(markdown.extensions.Extension):
def extendMarkdown(self, md, md_globals):
md.preprocessors['clickhouse'] = ClickHousePreprocessor()
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md)
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md)
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
md.inlinePatterns["link"] = ClickHouseLinkPattern(
markdown.inlinepatterns.LINK_RE, md
)
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
markdown.inlinepatterns.AUTOLINK_RE, md
)
def makeExtension(**kwargs):
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
def get_translations(dirname, lang):
import babel.support
return babel.support.Translations.load(
dirname=dirname,
locales=[lang, 'en']
)
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
def on_config(self, config):
super(PatchedMacrosPlugin, self).on_config(config)
self.env.comment_start_string = '{##'
self.env.comment_end_string = '##}'
self.env.loader = jinja2.FileSystemLoader([
os.path.join(config.data['site_dir']),
os.path.join(config.data['extra']['includes_dir'])
])
self.env.comment_start_string = "{##"
self.env.comment_end_string = "##}"
self.env.loader = jinja2.FileSystemLoader(
[
os.path.join(config.data["site_dir"]),
os.path.join(config.data["extra"]["includes_dir"]),
]
)
def on_env(self, env, config, files):
import util
env.add_extension('jinja2.ext.i18n')
dirname = os.path.join(config.data['theme'].dirs[0], 'locale')
lang = config.data['theme']['language']
env.install_gettext_translations(
get_translations(dirname, lang),
newstyle=True
)
env.add_extension("jinja2.ext.i18n")
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
lang = config.data["theme"]["language"]
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
util.init_jinja2_filters(env)
return env
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
return markdown
def on_page_markdown(self, markdown, page, config, files):
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
markdown, page, config, files
)
if os.path.islink(page.file.abs_src_path):
lang = config.data['theme']['language']
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
lang = config.data["theme"]["language"]
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
if config.data["extra"].get("version_prefix") or config.data["extra"].get(
"single_page"
):
return markdown
if self.skip_git_log:
return markdown

View File

@ -10,57 +10,59 @@ import util
def find_first_header(content):
for line in content.split('\n'):
if line.startswith('#'):
no_hash = line.lstrip('#')
return no_hash.split('{', 1)[0].strip()
for line in content.split("\n"):
if line.startswith("#"):
no_hash = line.lstrip("#")
return no_hash.split("{", 1)[0].strip()
def build_nav_entry(root, args):
if root.endswith('images'):
if root.endswith("images"):
return None, None, None
result_items = []
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md'))
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title'))
current_title = current_title or index_meta.get('title', find_first_header(index_content))
index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
current_title = current_title or index_meta.get(
"title", find_first_header(index_content)
)
for filename in os.listdir(root):
path = os.path.join(root, filename)
if os.path.isdir(path):
prio, title, payload = build_nav_entry(path, args)
if title and payload:
result_items.append((prio, title, payload))
elif filename.endswith('.md'):
elif filename.endswith(".md"):
path = os.path.join(root, filename)
meta = ''
content = ''
meta = ""
content = ""
try:
meta, content = util.read_md_file(path)
except:
print('Error in file: {}'.format(path))
print("Error in file: {}".format(path))
raise
path = path.split('/', 2)[-1]
title = meta.get('toc_title', find_first_header(content))
path = path.split("/", 2)[-1]
title = meta.get("toc_title", find_first_header(content))
if title:
title = title.strip().rstrip('.')
title = title.strip().rstrip(".")
else:
title = meta.get('toc_folder_title', 'hidden')
prio = meta.get('toc_priority', 9999)
logging.debug(f'Nav entry: {prio}, {title}, {path}')
if meta.get('toc_hidden') or not content.strip():
title = 'hidden'
if title == 'hidden':
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest()
title = meta.get("toc_folder_title", "hidden")
prio = meta.get("toc_priority", 9999)
logging.debug(f"Nav entry: {prio}, {title}, {path}")
if meta.get("toc_hidden") or not content.strip():
title = "hidden"
if title == "hidden":
title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
if args.nav_limit and len(result_items) >= args.nav_limit:
break
result_items.append((prio, title, path))
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
if index_meta.get('toc_hidden_folder'):
current_title += '|hidden-folder'
return index_meta.get('toc_priority', 10000), current_title, result
if index_meta.get("toc_hidden_folder"):
current_title += "|hidden-folder"
return index_meta.get("toc_priority", 10000), current_title, result
def build_docs_nav(lang, args):
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
index_key = None
for key, value in list(nav.items()):
if key and value:
if value == 'index.md':
if value == "index.md":
index_key = key
continue
result.append({key: value})
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
break
if index_key:
key = list(result[0].keys())[0]
result[0][key][index_key] = 'index.md'
result[0][key][index_key] = "index.md"
result[0][key].move_to_end(index_key, last=False)
return result
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
def build_blog_nav(lang, args):
blog_dir = os.path.join(args.blog_dir, lang)
years = sorted(os.listdir(blog_dir), reverse=True)
result_nav = [{'hidden': 'index.md'}]
result_nav = [{"hidden": "index.md"}]
post_meta = collections.OrderedDict()
for year in years:
year_dir = os.path.join(blog_dir, year)
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
post_meta_items = []
for post in os.listdir(year_dir):
post_path = os.path.join(year_dir, post)
if not post.endswith('.md'):
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
if not post.endswith(".md"):
raise RuntimeError(
f"Unexpected non-md file in posts folder: {post_path}"
)
meta, _ = util.read_md_file(post_path)
post_date = meta['date']
post_title = meta['title']
post_date = meta["date"]
post_title = meta["title"]
if datetime.date.fromisoformat(post_date) > datetime.date.today():
continue
posts.append(
(post_date, post_title, os.path.join(year, post),)
(
post_date,
post_title,
os.path.join(year, post),
)
)
if post_title in post_meta:
raise RuntimeError(f'Duplicate post title: {post_title}')
if not post_date.startswith(f'{year}-'):
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}')
post_url_part = post.replace('.md', '')
post_meta_items.append((post_date, {
'date': post_date,
'title': post_title,
'image': meta.get('image'),
'url': f'/blog/{lang}/{year}/{post_url_part}/'
},))
raise RuntimeError(f"Duplicate post title: {post_title}")
if not post_date.startswith(f"{year}-"):
raise RuntimeError(
f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
)
post_url_part = post.replace(".md", "")
post_meta_items.append(
(
post_date,
{
"date": post_date,
"title": post_title,
"image": meta.get("image"),
"url": f"/blog/{lang}/{year}/{post_url_part}/",
},
)
)
for _, title, path in sorted(posts, reverse=True):
result_nav[-1][year][title] = path
for _, post_meta_item in sorted(post_meta_items,
reverse=True,
key=lambda item: item[0]):
post_meta[post_meta_item['title']] = post_meta_item
for _, post_meta_item in sorted(
post_meta_items, reverse=True, key=lambda item: item[0]
):
post_meta[post_meta_item["title"]] = post_meta_item
return result_nav, post_meta
def _custom_get_navigation(files, config):
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages())
nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
f.src_path for f in files.documentation_pages()
)
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
if not isinstance(items, list):
items = [items]
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
mkdocs.structure.nav._add_previous_and_next_links(pages)
mkdocs.structure.nav._add_parent_links(items)
missing_from_config = [file for file in files.documentation_pages() if file.page is None]
missing_from_config = [
file for file in files.documentation_pages() if file.page is None
]
if missing_from_config:
files._files = [file for file in files._files if file not in missing_from_config]
files._files = [
file for file in files._files if file not in missing_from_config
]
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
for link in links:
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url)
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
link.url
)
if scheme or netloc:
mkdocs.structure.nav.log.debug(
"An external link to '{}' is included in "
"the 'nav' configuration.".format(link.url)
)
elif link.url.startswith('/'):
elif link.url.startswith("/"):
mkdocs.structure.nav.log.debug(
"An absolute path to '{}' is included in the 'nav' configuration, "
"which presumably points to an external resource.".format(link.url)

View File

@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
os.makedirs(out_dir)
except OSError:
pass
with open(out_path, 'w') as f:
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]-->
with open(out_path, "w") as f:
f.write(
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
<!DOCTYPE HTML>
<html lang="en-US">
<head>
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
<body>
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
</body>
</html>''')
</html>"""
)
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
out_path = os.path.join(
output_dir, lang,
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
output_dir,
lang,
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
)
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
target_path = to_path.replace("/index.md", "/").replace(".md", "/")
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
to_url = f'/{base_prefix}/{lang}/{target_path}'
if target_path[0:7] != "http://" and target_path[0:8] != "https://":
to_url = f"/{base_prefix}/{lang}/{target_path}"
else:
to_url = target_path
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
def build_docs_redirects(args):
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
for line in f:
for lang in args.lang.split(','):
from_path, to_path = line.split(' ', 1)
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path)
for lang in args.lang.split(","):
from_path, to_path = line.split(" ", 1)
build_redirect_html(
args, "docs", lang, args.docs_output_dir, from_path, to_path
)
def build_blog_redirects(args):
for lang in args.blog_lang.split(','):
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt')
for lang in args.blog_lang.split(","):
redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
if os.path.exists(redirects_path):
with open(redirects_path, 'r') as f:
with open(redirects_path, "r") as f:
for line in f:
from_path, to_path = line.split(' ', 1)
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path)
from_path, to_path = line.split(" ", 1)
build_redirect_html(
args, "blog", lang, args.blog_output_dir, from_path, to_path
)
def build_static_redirects(args):
for static_redirect in [
('benchmark.html', '/benchmark/dbms/'),
('benchmark_hardware.html', '/benchmark/hardware/'),
('tutorial.html', '/docs/en/getting_started/tutorial/',),
('reference_en.html', '/docs/en/single/', ),
('reference_ru.html', '/docs/ru/single/',),
('docs/index.html', '/docs/en/',),
("benchmark.html", "/benchmark/dbms/"),
("benchmark_hardware.html", "/benchmark/hardware/"),
(
"tutorial.html",
"/docs/en/getting_started/tutorial/",
),
(
"reference_en.html",
"/docs/en/single/",
),
(
"reference_ru.html",
"/docs/ru/single/",
),
(
"docs/index.html",
"/docs/en/",
),
]:
write_redirect_html(
os.path.join(args.output_dir, static_redirect[0]),
static_redirect[1]
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
)

View File

@ -12,7 +12,8 @@ import test
import util
import website
TEMPORARY_FILE_NAME = 'single.md'
TEMPORARY_FILE_NAME = "single.md"
def recursive_values(item):
if isinstance(item, dict):
@ -25,11 +26,14 @@ def recursive_values(item):
yield item
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, '-', path)
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
absolute_link = re.compile(r'^https?://')
def generate_anchor_from_path(path):
return re.sub(anchor_not_allowed_chars, "-", path)
absolute_link = re.compile(r"^https?://")
def replace_link(match, path):
@ -40,46 +44,55 @@ def replace_link(match, path):
if re.search(absolute_link, link):
return match.group(0)
if link.endswith('/'):
link = link[0:-1] + '.md'
if link.endswith("/"):
link = link[0:-1] + ".md"
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
return "{}(#{})".format(
title,
generate_anchor_from_path(
os.path.normpath(os.path.join(os.path.dirname(path), link))
),
)
# Concatenates Markdown files to a single file.
def concatenate(lang, docs_path, single_page_file, nav):
lang_path = os.path.join(docs_path, lang)
proj_config = f'{docs_path}/toc_{lang}.yml'
proj_config = f"{docs_path}/toc_{lang}.yml"
if os.path.exists(proj_config):
with open(proj_config) as cfg_file:
nav = yaml.full_load(cfg_file.read())['nav']
nav = yaml.full_load(cfg_file.read())["nav"]
files_to_concatenate = list(recursive_values(nav))
files_count = len(files_to_concatenate)
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
assert files_count > 0, f'Empty single-page for {lang}'
logging.info(
f"{files_count} files will be concatenated into single md-file for {lang}."
)
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
assert files_count > 0, f"Empty single-page for {lang}"
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
for path in files_to_concatenate:
try:
with open(os.path.join(lang_path, path)) as f:
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
single_page_file.write(
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
)
in_metadata = False
for line in f:
# Skip YAML metadata.
if line == '---\n':
if line == "---\n":
in_metadata = not in_metadata
continue
if not in_metadata:
# Increase the level of headers.
if line.startswith('#'):
line = '#' + line
if line.startswith("#"):
line = "#" + line
# Replace links within the docs.
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
line = re.sub(
link_regexp,
lambda match: replace_link(match, path),
line)
line,
)
# If failed to replace the relative link, print to log
# But with some exceptions:
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
logging.info('Failed to resolve relative link:')
if (
"../" in line
and (not "../usr/share" in line)
and (not "../src/" in line)
):
logging.info("Failed to resolve relative link:")
logging.info(path)
logging.info(line)
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
single_page_file.flush()
def get_temporary_file_name(lang, args):
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
def remove_temporary_files(lang, args):
single_md_path = get_temporary_file_name(lang, args)
if os.path.exists(single_md_path):
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
def build_single_page_version(lang, args, nav, cfg):
logging.info(f'Building single page version for {lang}')
os.environ['SINGLE_PAGE'] = '1'
extra = cfg.data['extra']
extra['single_page'] = True
extra['is_amp'] = False
logging.info(f"Building single page version for {lang}")
os.environ["SINGLE_PAGE"] = "1"
extra = cfg.data["extra"]
extra["single_page"] = True
extra["is_amp"] = False
single_md_path = get_temporary_file_name(lang, args)
with open(single_md_path, 'w') as single_md:
with open(single_md_path, "w") as single_md:
concatenate(lang, args.docs_dir, single_md, nav)
with util.temp_dir() as site_temp:
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
shutil.copytree(docs_src_lang, docs_temp_lang)
for root, _, filenames in os.walk(docs_temp_lang):
for filename in filenames:
if filename != 'single.md' and filename.endswith('.md'):
if filename != "single.md" and filename.endswith(".md"):
os.unlink(os.path.join(root, filename))
cfg.load_dict({
'docs_dir': docs_temp_lang,
'site_dir': site_temp,
'extra': extra,
'nav': [
{cfg.data.get('site_name'): 'single.md'}
]
})
cfg.load_dict(
{
"docs_dir": docs_temp_lang,
"site_dir": site_temp,
"extra": extra,
"nav": [{cfg.data.get("site_name"): "single.md"}],
}
)
if not args.test_only:
mkdocs.commands.build.build(cfg)
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
single_page_output_path = os.path.join(
args.docs_dir, args.docs_output_dir, lang, "single"
)
if os.path.exists(single_page_output_path):
shutil.rmtree(single_page_output_path)
shutil.copytree(
os.path.join(site_temp, 'single'),
single_page_output_path
os.path.join(site_temp, "single"), single_page_output_path
)
single_page_index_html = os.path.join(single_page_output_path, 'index.html')
single_page_content_js = os.path.join(single_page_output_path, 'content.js')
single_page_index_html = os.path.join(
single_page_output_path, "index.html"
)
single_page_content_js = os.path.join(
single_page_output_path, "content.js"
)
with open(single_page_index_html, 'r') as f:
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
with open(single_page_index_html, "r") as f:
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
with open(single_page_index_html, 'w') as f:
with open(single_page_index_html, "w") as f:
f.write(sp_prefix)
f.write(sp_suffix)
with open(single_page_content_js, 'w') as f:
with open(single_page_content_js, "w") as f:
if args.minify:
import jsmin
sp_js = jsmin.jsmin(sp_js)
f.write(sp_js)
logging.info(f'Re-building single page for {lang} pdf/test')
logging.info(f"Re-building single page for {lang} pdf/test")
with util.temp_dir() as test_dir:
extra['single_page'] = False
cfg.load_dict({
'docs_dir': docs_temp_lang,
'site_dir': test_dir,
'extra': extra,
'nav': [
{cfg.data.get('site_name'): 'single.md'}
]
})
extra["single_page"] = False
cfg.load_dict(
{
"docs_dir": docs_temp_lang,
"site_dir": test_dir,
"extra": extra,
"nav": [{cfg.data.get("site_name"): "single.md"}],
}
)
mkdocs.commands.build.build(cfg)
css_in = ' '.join(website.get_css_in(args))
js_in = ' '.join(website.get_js_in(args))
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
css_in = " ".join(website.get_css_in(args))
js_in = " ".join(website.get_js_in(args))
subprocess.check_call(
f"cat {css_in} > {test_dir}/css/base.css", shell=True
)
subprocess.check_call(
f"cat {js_in} > {test_dir}/js/base.js", shell=True
)
if args.save_raw_single_page:
shutil.copytree(test_dir, args.save_raw_single_page)
logging.info(f'Running tests for {lang}')
logging.info(f"Running tests for {lang}")
test.test_single_page(
os.path.join(test_dir, 'single', 'index.html'), lang)
os.path.join(test_dir, "single", "index.html"), lang
)
logging.info(f'Finished building single page version for {lang}')
logging.info(f"Finished building single page version for {lang}")
remove_temporary_files(lang, args)

View File

@ -8,14 +8,11 @@ import subprocess
def test_single_page(input_path, lang):
if not (lang == 'en'):
if not (lang == "en"):
return
with open(input_path) as f:
soup = bs4.BeautifulSoup(
f,
features='html.parser'
)
soup = bs4.BeautifulSoup(f, features="html.parser")
anchor_points = set()
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
links_to_nowhere = 0
for tag in soup.find_all():
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
if anchor_point:
anchor_points.add(anchor_point)
for tag in soup.find_all():
href = tag.attrs.get('href')
if href and href.startswith('#') and href != '#':
href = tag.attrs.get("href")
if href and href.startswith("#") and href != "#":
if href[1:] not in anchor_points:
links_to_nowhere += 1
logging.info("Tag %s", tag)
logging.info('Link to nowhere: %s' % href)
logging.info("Link to nowhere: %s" % href)
if links_to_nowhere:
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
sys.exit(1)
if len(anchor_points) <= 10:
logging.error('Html parsing is probably broken')
logging.error("Html parsing is probably broken")
sys.exit(1)
if __name__ == '__main__':
logging.basicConfig(
level=logging.DEBUG,
stream=sys.stderr
)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
test_single_page(sys.argv[1], sys.argv[2])

View File

@ -15,7 +15,7 @@ import yaml
@contextlib.contextmanager
def temp_dir():
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
try:
yield path
finally:
@ -34,7 +34,7 @@ def cd(new_cwd):
def get_free_port():
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0))
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
@ -61,12 +61,12 @@ def read_md_file(path):
meta_text = []
content = []
if os.path.exists(path):
with open(path, 'r') as f:
with open(path, "r") as f:
for line in f:
if line.startswith('---'):
if line.startswith("---"):
if in_meta:
in_meta = False
meta = yaml.full_load(''.join(meta_text))
meta = yaml.full_load("".join(meta_text))
else:
in_meta = True
else:
@ -74,7 +74,7 @@ def read_md_file(path):
meta_text.append(line)
else:
content.append(line)
return meta, ''.join(content)
return meta, "".join(content)
def write_md_file(path, meta, content):
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(path, 'w') as f:
with open(path, "w") as f:
if meta:
print('---', file=f)
print("---", file=f)
yaml.dump(meta, f)
print('---', file=f)
if not content.startswith('\n'):
print('', file=f)
print("---", file=f)
if not content.startswith("\n"):
print("", file=f)
f.write(content)
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
value.append((node_key, node_value))
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
def init_jinja2_filters(env):
import amp
import website
chunk_size = 10240
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)]
env.filters['html_to_amp'] = amp.html_to_amp
env.filters['adjust_markdown_html'] = website.adjust_markdown_html
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT')
env.filters["chunks"] = lambda line: [
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
]
env.filters["html_to_amp"] = amp.html_to_amp
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")
def init_jinja2_env(args):
import mdx_clickhouse
env = jinja2.Environment(
loader=jinja2.FileSystemLoader([
args.website_dir,
os.path.join(args.docs_dir, '_includes')
]),
extensions=[
'jinja2.ext.i18n',
'jinja2_highlight.HighlightExtension'
]
loader=jinja2.FileSystemLoader(
[args.website_dir, os.path.join(args.docs_dir, "_includes")]
),
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
)
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
translations_dir = os.path.join(args.website_dir, 'locale')
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
translations_dir = os.path.join(args.website_dir, "locale")
env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, 'en'),
newstyle=True
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
)
init_jinja2_filters(env)
return env

View File

@ -17,108 +17,112 @@ import util
def handle_iframe(iframe, soup):
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
illegal_domain = True
iframe_src = iframe.attrs['src']
iframe_src = iframe.attrs["src"]
for domain in allowed_domains:
if iframe_src.startswith(domain):
illegal_domain = False
break
if illegal_domain:
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
wrapper = soup.new_tag('div')
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag("div")
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper)
iframe.extract()
wrapper.insert(0, iframe)
if 'width' in iframe.attrs:
del iframe.attrs['width']
if 'height' in iframe.attrs:
del iframe.attrs['height']
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
iframe.attrs['class'] = 'embed-responsive-item'
iframe.attrs['frameborder'] = '0'
iframe.attrs['allowfullscreen'] = '1'
if "width" in iframe.attrs:
del iframe.attrs["width"]
if "height" in iframe.attrs:
del iframe.attrs["height"]
iframe.attrs[
"allow"
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content):
soup = bs4.BeautifulSoup(
content,
features='html.parser'
)
soup = bs4.BeautifulSoup(content, features="html.parser")
for a in soup.find_all('a'):
a_class = a.attrs.get('class')
a_href = a.attrs.get('href')
if a_class and 'headerlink' in a_class:
a.string = '\xa0'
if a_href and a_href.startswith('http'):
a.attrs['target'] = '_blank'
for a in soup.find_all("a"):
a_class = a.attrs.get("class")
a_href = a.attrs.get("href")
if a_class and "headerlink" in a_class:
a.string = "\xa0"
if a_href and a_href.startswith("http"):
a.attrs["target"] = "_blank"
for code in soup.find_all('code'):
code_class = code.attrs.get('class')
for code in soup.find_all("code"):
code_class = code.attrs.get("class")
if code_class:
code.attrs['class'] = code_class + ['syntax']
code.attrs["class"] = code_class + ["syntax"]
else:
code.attrs['class'] = 'syntax'
code.attrs["class"] = "syntax"
for iframe in soup.find_all('iframe'):
for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup)
for img in soup.find_all('img'):
if img.attrs.get('alt') == 'iframe':
img.name = 'iframe'
img.string = ''
for img in soup.find_all("img"):
if img.attrs.get("alt") == "iframe":
img.name = "iframe"
img.string = ""
handle_iframe(img, soup)
continue
img_class = img.attrs.get('class')
img_class = img.attrs.get("class")
if img_class:
img.attrs['class'] = img_class + ['img-fluid']
img.attrs["class"] = img_class + ["img-fluid"]
else:
img.attrs['class'] = 'img-fluid'
img.attrs["class"] = "img-fluid"
for details in soup.find_all('details'):
for summary in details.find_all('summary'):
for details in soup.find_all("details"):
for summary in details.find_all("summary"):
if summary.parent != details:
summary.extract()
details.insert(0, summary)
for dd in soup.find_all('dd'):
dd_class = dd.attrs.get('class')
for dd in soup.find_all("dd"):
dd_class = dd.attrs.get("class")
if dd_class:
dd.attrs['class'] = dd_class + ['pl-3']
dd.attrs["class"] = dd_class + ["pl-3"]
else:
dd.attrs['class'] = 'pl-3'
dd.attrs["class"] = "pl-3"
for div in soup.find_all('div'):
div_class = div.attrs.get('class')
is_admonition = div_class and 'admonition' in div.attrs.get('class')
for div in soup.find_all("div"):
div_class = div.attrs.get("class")
is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition:
for a in div.find_all('a'):
a_class = a.attrs.get('class')
for a in div.find_all("a"):
a_class = a.attrs.get("class")
if a_class:
a.attrs['class'] = a_class + ['alert-link']
a.attrs["class"] = a_class + ["alert-link"]
else:
a.attrs['class'] = 'alert-link'
a.attrs["class"] = "alert-link"
for p in div.find_all('p'):
p_class = p.attrs.get('class')
if is_admonition and p_class and ('admonition-title' in p_class):
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
for p in div.find_all("p"):
p_class = p.attrs.get("class")
if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition:
div.attrs['role'] = 'alert'
if ('info' in div_class) or ('note' in div_class):
mode = 'alert-primary'
elif ('attention' in div_class) or ('warning' in div_class):
mode = 'alert-warning'
elif 'important' in div_class:
mode = 'alert-danger'
elif 'tip' in div_class:
mode = 'alert-info'
div.attrs["role"] = "alert"
if ("info" in div_class) or ("note" in div_class):
mode = "alert-primary"
elif ("attention" in div_class) or ("warning" in div_class):
mode = "alert-warning"
elif "important" in div_class:
mode = "alert-danger"
elif "tip" in div_class:
mode = "alert-info"
else:
mode = 'alert-secondary'
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
mode = "alert-secondary"
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup)
@ -128,61 +132,63 @@ def minify_html(content):
def build_website(args):
logging.info('Building website')
logging.info("Building website")
env = util.init_jinja2_env(args)
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
'*.md',
'*.sh',
'*.css',
'*.json',
'js/*.js',
'build',
'docs',
'public',
'node_modules',
'src',
'templates',
'locale',
'.gitkeep'
)
"*.md",
"*.sh",
"*.css",
"*.json",
"js/*.js",
"build",
"docs",
"public",
"node_modules",
"src",
"templates",
"locale",
".gitkeep",
),
)
shutil.copytree(
os.path.join(args.website_dir, 'images'),
os.path.join(args.output_dir, 'docs', 'images')
os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, "docs", "images"),
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse.
shutil.copy2(
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
os.path.join(args.output_dir, 'data', 'install.sh'))
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
if filename == 'main.html':
if filename == "main.html":
continue
path = os.path.join(root, filename)
if not filename.endswith('.html'):
if not filename.endswith(".html"):
continue
logging.info('Processing %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
logging.info("Processing %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
template = env.from_string(content)
content = template.render(args.__dict__)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def get_css_in(args):
@ -193,7 +199,7 @@ def get_css_in(args):
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'"
f"'{args.website_dir}/css/main.css'",
]
@ -207,42 +213,41 @@ def get_js_in(args):
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'"
f"'{args.website_dir}/js/main.js'",
]
def minify_file(path, css_digest, js_digest):
if not (
path.endswith('.html') or
path.endswith('.css')
):
if not (path.endswith(".html") or path.endswith(".css")):
return
logging.info('Minifying %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
if path.endswith('.html'):
logging.info("Minifying %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
if path.endswith(".html"):
content = minify_html(content)
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
# TODO: restore cssmin
# elif path.endswith('.css'):
# content = cssmin.cssmin(content)
# TODO: restore jsmin
# elif path.endswith('.js'):
# content = jsmin.jsmin(content)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
# TODO: restore cssmin
# elif path.endswith('.css'):
# content = cssmin.cssmin(content)
# TODO: restore jsmin
# elif path.endswith('.js'):
# content = jsmin.jsmin(content)
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def minify_website(args):
css_in = ' '.join(get_css_in(args))
css_out = f'{args.output_dir}/docs/css/base.css'
os.makedirs(f'{args.output_dir}/docs/css')
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
if args.minify and False: # TODO: return closure
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
command = (
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
)
logging.info(css_in)
logging.info(command)
output = subprocess.check_output(command, shell=True)
@ -251,51 +256,60 @@ def minify_website(args):
else:
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, 'wb+') as f:
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, 'rb') as f:
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = ' '.join(get_js_in(args))
js_out = f'{args.output_dir}/docs/js/base.js'
os.makedirs(f'{args.output_dir}/docs/js')
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
if args.minify and False: # TODO: return closure
js_in = [js[1:-1] for js in js_in]
closure_args = [
'--js', *js_in, '--js_output_file', js_out,
'--compilation_level', 'SIMPLE',
'--dependency_mode', 'NONE',
'--third_party', '--use_types_for_optimization',
'--isolation_mode', 'IIFE'
"--js",
*js_in,
"--js_output_file",
js_out,
"--compilation_level",
"SIMPLE",
"--dependency_mode",
"NONE",
"--third_party",
"--use_types_for_optimization",
"--isolation_mode",
"IIFE",
]
logging.info(closure_args)
if closure.run(*closure_args):
raise RuntimeError('failed to run closure compiler')
with open(js_out, 'r') as f:
raise RuntimeError("failed to run closure compiler")
with open(js_out, "r") as f:
js_content = jsmin.jsmin(f.read())
with open(js_out, 'w') as f:
with open(js_out, "w") as f:
f.write(js_content)
else:
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, 'wb+') as f:
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, 'rb') as f:
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
if args.minify:
logging.info('Minifying website')
logging.info("Minifying website")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
path = os.path.join(root, filename)
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
futures.append(
executor.submit(minify_file, path, css_digest, js_digest)
)
for future in futures:
exc = future.exception()
if exc:
@ -304,24 +318,28 @@ def minify_website(args):
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, 'benchmark')
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {
'dbms': ['result'],
'hardware': ['result', 'system', 'system_full', 'kind']
"dbms": ["result"],
"hardware": ["result", "system", "system_full", "kind"],
}
for benchmark_kind in ['dbms', 'hardware']:
for benchmark_kind in ["dbms", "hardware"]:
results = []
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.debug(f'Reading benchmark result from {result_file}')
with open(result_file, 'r') as f:
logging.debug(f"Reading benchmark result from {result_file}")
with open(result_file, "r") as f:
result = json.loads(f.read())
for item in result:
for required_key in required_keys[benchmark_kind]:
assert required_key in item, f'No "{required_key}" in {result_file}'
assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
with open(results_js, 'w') as f:
results_js = os.path.join(
args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results)
f.write(f'var results = {data};')
f.write(f"var results = {data};")

View File

@ -42,6 +42,8 @@ git push
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)

View File

@ -7,16 +7,14 @@ import string
TOKEN_TEXT = 1
TOKEN_VAR = 2
TOKEN_COLON = ':'
TOKEN_SEMI = ';'
TOKEN_OR = '|'
TOKEN_QUESTIONMARK = '?'
TOKEN_ROUND_BRACKET_OPEN = '('
TOKEN_ROUND_BRACKET_CLOSE = ')'
TOKEN_ASTERISK = '*'
TOKEN_SLASH = '/'
TOKEN_COLON = ":"
TOKEN_SEMI = ";"
TOKEN_OR = "|"
TOKEN_QUESTIONMARK = "?"
TOKEN_ROUND_BRACKET_OPEN = "("
TOKEN_ROUND_BRACKET_CLOSE = ")"
TOKEN_ASTERISK = "*"
TOKEN_SLASH = "/"
class TextValue:
@ -27,9 +25,9 @@ class TextValue:
def get_slug(self):
if self.slug is not None:
return self.slug
slug = ''
slug = ""
for c in self.t:
slug += c if c in string.ascii_letters else '_'
slug += c if c in string.ascii_letters else "_"
self.slug = slug
return slug
@ -37,12 +35,12 @@ class TextValue:
return f"TextValue_{self.get_slug()}"
def __repr__(self):
return f"TextValue(\"{self.t}\")"
return f'TextValue("{self.t}")'
class Var:
def __init__(self, id_):
self.id_ = id_
self.id_ = id_
def __repr__(self):
return f"Var({self.id_})"
@ -59,8 +57,8 @@ class Parser:
self.cur_tok = None
self.includes = []
self.proto = ''
self.cpp = ''
self.proto = ""
self.cpp = ""
def parse_file(self, filename):
with open(filename) as f:
@ -81,7 +79,7 @@ class Parser:
if self.text[0] == '"':
return self.parse_txt_value()
if self.text[0] == '$':
if self.text[0] == "$":
return self.parse_var_value()
c, self.text = self.text[0], self.text[1:]
@ -89,9 +87,9 @@ class Parser:
return c
def parse_var_value(self):
i = self.text.find(' ')
i = self.text.find(" ")
id_, self.text = self.text[1:i], self.text[i+1:]
id_, self.text = self.text[1:i], self.text[i + 1 :]
self.var_id = int(id_)
self.cur_tok = TOKEN_VAR
return TOKEN_VAR
@ -100,12 +98,12 @@ class Parser:
if self.text[0] != '"':
raise Exception("parse_txt_value: expected quote at the start")
self.t = ''
self.t = ""
self.text = self.text[1:]
while self.text[0] != '"':
if self.text[0] == '\\':
if self.text[1] == 'x':
if self.text[0] == "\\":
if self.text[1] == "x":
self.t += self.text[:4]
self.text = self.text[4:]
elif self.text[1] in 'nt\\"':
@ -123,7 +121,7 @@ class Parser:
def skip_ws(self):
while self.text and self.text[0] in string.whitespace:
if self.text[0] == '\n':
if self.text[0] == "\n":
self.line += 1
self.col = 0
self.text = self.text[1:]
@ -134,10 +132,9 @@ class Parser:
def skip_line(self):
self.line += 1
index = self.text.find('\n')
index = self.text.find("\n")
self.text = self.text[index:]
def parse_statement(self):
if self.skip_ws() is None:
return None
@ -164,52 +161,54 @@ class Parser:
def generate(self):
self.proto = 'syntax = "proto3";\n\n'
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
self.cpp = "#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n"
for incl_file in self.includes:
self.cpp += f'#include "{incl_file}"\n'
self.cpp += '\n'
self.cpp += "\n"
self.proto += 'message Word {\n'
self.proto += '\tenum Value {\n'
self.proto += "message Word {\n"
self.proto += "\tenum Value {\n"
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
self.cpp += "void GenerateWord(const Word&, std::string&, int);\n\n"
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
self.cpp += '\t}\n'
self.cpp += '}\n'
self.cpp += (
"void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n"
)
self.cpp += "\tfor (int i = 0; i < stc.words_size(); i++ ) {\n"
self.cpp += "\t\tGenerateWord(stc.words(i), s, ++depth);\n"
self.cpp += "\t}\n"
self.cpp += "}\n"
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
self.cpp += "void GenerateWord(const Word& word, std::string &s, int depth) {\n"
self.cpp += '\tif (depth > 5) return;\n\n'
self.cpp += '\tswitch (word.value()) {\n'
self.cpp += "\tif (depth > 5) return;\n\n"
self.cpp += "\tswitch (word.value()) {\n"
for idx, chain in enumerate(self.chains):
self.proto += f'\t\tvalue_{idx} = {idx};\n'
self.proto += f"\t\tvalue_{idx} = {idx};\n"
self.cpp += f'\t\tcase {idx}: {{\n'
self.cpp += f"\t\tcase {idx}: {{\n"
num_var = 0
for item in chain:
if isinstance(item, TextValue):
self.cpp += f'\t\t\ts += "{item.t}";\n'
elif isinstance(item, Var):
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
self.cpp += f"\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n"
num_var += 1
else:
raise Exception("unknown token met during generation")
self.cpp += '\t\t\tbreak;\n\t\t}\n'
self.cpp += '\t\tdefault: break;\n'
self.cpp += "\t\t\tbreak;\n\t\t}\n"
self.cpp += "\t\tdefault: break;\n"
self.cpp += '\t}\n'
self.cpp += "\t}\n"
self.proto += '\t}\n'
self.proto += '\tValue value = 1;\n'
self.proto += '\tSentence inner = 2;\n'
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
self.proto += "\t}\n"
self.proto += "\tValue value = 1;\n"
self.proto += "\tSentence inner = 2;\n"
self.proto += "}\nmessage Sentence {\n\trepeated Word words = 1;\n}"
self.cpp += '}\n'
self.cpp += "}\n"
return self.cpp, self.proto
def fatal_parsing_error(self, msg):
@ -220,7 +219,7 @@ class Parser:
def main(args):
input_file, outfile_cpp, outfile_proto = args
if not outfile_proto.endswith('.proto'):
if not outfile_proto.endswith(".proto"):
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
include_filename = outfile_proto[:-6] + ".pb.h"
@ -231,17 +230,17 @@ def main(args):
cpp, proto = p.generate()
proto = proto.replace('\t', ' ' * 4)
cpp = cpp.replace('\t', ' ' * 4)
proto = proto.replace("\t", " " * 4)
cpp = cpp.replace("\t", " " * 4)
with open(outfile_cpp, 'w') as f:
with open(outfile_cpp, "w") as f:
f.write(cpp)
with open(outfile_proto, 'w') as f:
with open(outfile_proto, "w") as f:
f.write(proto)
if __name__ == '__main__':
if __name__ == "__main__":
if len(sys.argv) < 3:
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
sys.exit(1)

View File

@ -9,7 +9,9 @@ import re
parts = {}
for s in sys.stdin.read().split():
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
m = re.match(
"^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s
)
if m == None:
continue
m1 = m.group(1)
@ -18,7 +20,7 @@ for s in sys.stdin.read().split():
i2 = int(m.group(4))
l = int(m.group(5))
if m1 != m2:
raise Exception('not in single month: ' + s)
raise Exception("not in single month: " + s)
if m1 not in parts:
parts[m1] = []
parts[m1].append((i1, i2, l, s))
@ -27,13 +29,13 @@ for m, ps in sorted(parts.items()):
ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2]))
(x2, y2, l2, s2) = (-1, -1, -1, -1)
for x1, y1, l1, s1 in ps:
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
pass
elif x1 > y2: # 1 is to the right of 2
elif x1 > y2: # 1 is to the right of 2
if x1 != y2 + 1 and y2 != -1:
print() # to see the missing numbers
print() # to see the missing numbers
(x2, y2, l2, s2) = (x1, y1, l1, s1)
print(s1)
else:
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
raise Exception("invalid parts intersection: " + s1 + " and " + s2)
print()

View File

@ -7,8 +7,14 @@ import sys
from github import Github
from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \
GITHUB_RUN_ID
from env_helper import (
GITHUB_REPOSITORY,
TEMP_PATH,
REPO_COPY,
REPORTS_PATH,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
@ -19,19 +25,24 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickh
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
IMAGE_NAME = 'clickhouse/fuzzer'
IMAGE_NAME = "clickhouse/fuzzer"
def get_run_command(pr_number, sha, download_url, workspace_path, image):
return f'docker run --network=host --volume={workspace_path}:/workspace ' \
'--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\
f'{image}'
return (
f"docker run --network=host --volume={workspace_path}:/workspace "
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
f"{image}"
)
def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha)
return commit
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -64,7 +75,7 @@ if __name__ == "__main__":
raise Exception("No build URLs found")
for url in urls:
if url.endswith('/clickhouse'):
if url.endswith("/clickhouse"):
build_url = url
break
else:
@ -72,16 +83,20 @@ if __name__ == "__main__":
logging.info("Got build url %s", build_url)
workspace_path = os.path.join(temp_path, 'workspace')
workspace_path = os.path.join(temp_path, "workspace")
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image)
run_command = get_run_command(
pr_info.number, pr_info.sha, build_url, workspace_path, docker_image
)
logging.info("Going to run %s", run_command)
run_log_path = os.path.join(temp_path, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log:
with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process:
with open(run_log_path, "w", encoding="utf-8") as log:
with subprocess.Popen(
run_command, shell=True, stderr=log, stdout=log
) as process:
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")
@ -90,56 +105,70 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '')
s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/'
check_name_lower = (
check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/"
paths = {
'runlog.log': run_log_path,
'main.log': os.path.join(workspace_path, 'main.log'),
'server.log': os.path.join(workspace_path, 'server.log'),
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'),
'report.html': os.path.join(workspace_path, 'report.html'),
'core.gz': os.path.join(workspace_path, 'core.gz'),
"runlog.log": run_log_path,
"main.log": os.path.join(workspace_path, "main.log"),
"server.log": os.path.join(workspace_path, "server.log"),
"fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
"report.html": os.path.join(workspace_path, "report.html"),
"core.gz": os.path.join(workspace_path, "core.gz"),
}
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
for f in paths:
try:
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f)
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)
except Exception as ex:
logging.info("Exception uploading file %s text %s", f, ex)
paths[f] = ''
paths[f] = ""
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
if paths['runlog.log']:
report_url = paths['runlog.log']
if paths['main.log']:
report_url = paths['main.log']
if paths['server.log']:
report_url = paths['server.log']
if paths['fuzzer.log']:
report_url = paths['fuzzer.log']
if paths['report.html']:
report_url = paths['report.html']
if paths["runlog.log"]:
report_url = paths["runlog.log"]
if paths["main.log"]:
report_url = paths["main.log"]
if paths["server.log"]:
report_url = paths["server.log"]
if paths["fuzzer.log"]:
report_url = paths["fuzzer.log"]
if paths["report.html"]:
report_url = paths["report.html"]
# Try to get status message saved by the fuzzer
try:
with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f:
status = status_f.readline().rstrip('\n')
with open(
os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
) as status_f:
status = status_f.readline().rstrip("\n")
with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f:
description = desc_f.readline().rstrip('\n')[:140]
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
except:
status = 'failure'
description = 'Task failed: $?=' + str(retcode)
status = "failure"
description = "Task failed: $?=" + str(retcode)
if 'fail' in status:
test_result = [(description, 'FAIL')]
if "fail" in status:
test_result = [(description, "FAIL")]
else:
test_result = [(description, 'OK')]
test_result = [(description, "OK")]
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_result,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
print(f"::notice ::Report url: {report_url}")

View File

@ -6,20 +6,20 @@ import itertools
import os
import sys
NO_CHANGES_MSG = 'Nothing to run'
NO_CHANGES_MSG = "Nothing to run"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('report1')
parser.add_argument('report2')
parser.add_argument("report1")
parser.add_argument("report2")
return parser.parse_args()
def post_commit_status_from_file(file_path):
res = []
with open(file_path, 'r', encoding='utf-8') as f:
fin = csv.reader(f, delimiter='\t')
with open(file_path, "r", encoding="utf-8") as f:
fin = csv.reader(f, delimiter="\t")
res = list(itertools.islice(fin, 1))
if len(res) < 1:
raise Exception(f'Can\'t read from "{file_path}"')
@ -31,8 +31,10 @@ def post_commit_status_from_file(file_path):
def process_results(file_path):
state, report_url, description = post_commit_status_from_file(file_path)
prefix = os.path.basename(os.path.dirname(file_path))
print(f'::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}')
return state == 'success'
print(
f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}"
)
return state == "success"
def main(args):
@ -42,5 +44,5 @@ def main(args):
sys.exit(0 if is_ok else 1)
if __name__ == '__main__':
if __name__ == "__main__":
main(parse_args())

View File

@ -6,7 +6,13 @@ import os
import sys
from github import Github
from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID
from env_helper import (
REPORTS_PATH,
TEMP_PATH,
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_ID,
)
from report import create_build_html_report
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
@ -15,8 +21,19 @@ from commit_status_helper import get_commit
from ci_config import CI_CONFIG
from rerun_helper import RerunHelper
class BuildResult():
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
class BuildResult:
def __init__(
self,
compiler,
build_type,
sanitizer,
bundled,
splitted,
status,
elapsed_seconds,
with_coverage,
):
self.compiler = compiler
self.build_type = build_type
self.sanitizer = sanitizer
@ -26,56 +43,72 @@ class BuildResult():
self.elapsed_seconds = elapsed_seconds
self.with_coverage = with_coverage
def group_by_artifacts(build_urls):
groups = {'apk': [],'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []}
groups = {
"apk": [],
"deb": [],
"binary": [],
"tgz": [],
"rpm": [],
"performance": [],
}
for url in build_urls:
if url.endswith('performance.tgz'):
groups['performance'].append(url)
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
groups['deb'].append(url)
elif url.endswith('.apk'):
groups['apk'].append(url)
elif url.endswith('.rpm'):
groups['rpm'].append(url)
elif url.endswith('.tgz'):
groups['tgz'].append(url)
if url.endswith("performance.tgz"):
groups["performance"].append(url)
elif (
url.endswith(".deb")
or url.endswith(".buildinfo")
or url.endswith(".changes")
or url.endswith(".tar.gz")
):
groups["deb"].append(url)
elif url.endswith(".apk"):
groups["apk"].append(url)
elif url.endswith(".rpm"):
groups["rpm"].append(url)
elif url.endswith(".tgz"):
groups["tgz"].append(url)
else:
groups['binary'].append(url)
groups["binary"].append(url)
return groups
def process_report(build_report):
build_config = build_report['build_config']
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config['compiler'],
build_type=build_config['build_type'],
sanitizer=build_config['sanitizer'],
bundled=build_config['bundled'],
splitted=build_config['splitted'],
status="success" if build_report['status'] else "failure",
elapsed_seconds=build_report['elapsed_seconds'],
with_coverage=False
compiler=build_config["compiler"],
build_type=build_config["build_type"],
sanitizer=build_config["sanitizer"],
bundled=build_config["bundled"],
splitted=build_config["splitted"],
status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report["elapsed_seconds"],
with_coverage=False,
)
build_results = []
build_urls = []
build_logs_urls = []
urls_groups = group_by_artifacts(build_report['build_urls'])
urls_groups = group_by_artifacts(build_report["build_urls"])
found_group = False
for _, group_urls in urls_groups.items():
if group_urls:
build_results.append(build_result)
build_urls.append(group_urls)
build_logs_urls.append(build_report['log_url'])
build_logs_urls.append(build_report["log_url"])
found_group = True
if not found_group:
build_results.append(build_result)
build_urls.append([""])
build_logs_urls.append(build_report['log_url'])
build_logs_urls.append(build_report["log_url"])
return build_results, build_urls, build_logs_urls
def get_build_name_from_file_name(file_name):
return file_name.replace('build_urls_', '').replace('.json', '')
return file_name.replace("build_urls_", "").replace(".json", "")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -101,17 +134,25 @@ if __name__ == "__main__":
build_reports_map = {}
for root, dirs, files in os.walk(reports_path):
for f in files:
if f.startswith("build_urls_") and f.endswith('.json'):
if f.startswith("build_urls_") and f.endswith(".json"):
logging.info("Found build report json %s", f)
build_name = get_build_name_from_file_name(f)
if build_name in reports_order:
with open(os.path.join(root, f), 'r') as file_handler:
with open(os.path.join(root, f), "r") as file_handler:
build_report = json.load(file_handler)
build_reports_map[build_name] = build_report
else:
logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name)
logging.info(
"Skipping report %s for build %s, it's not in our reports list",
f,
build_name,
)
build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map]
build_reports = [
build_reports_map[build_name]
for build_name in reports_order
if build_name in build_reports_map
]
build_results = []
build_artifacts = []
@ -129,7 +170,7 @@ if __name__ == "__main__":
logging.info("No builds, failing check")
sys.exit(1)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
pr_info = PRInfo()
@ -139,7 +180,9 @@ if __name__ == "__main__":
branch_name = "PR #{}".format(pr_info.number)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
task_url = (
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
)
report = create_build_html_report(
build_check_name,
build_results,
@ -148,18 +191,22 @@ if __name__ == "__main__":
task_url,
branch_url,
branch_name,
commit_url
commit_url,
)
report_path = os.path.join(temp_path, 'report.html')
with open(report_path, 'w') as f:
report_path = os.path.join(temp_path, "report.html")
with open(report_path, "w") as f:
f.write(report)
logging.info("Going to upload prepared report")
context_name_for_path = build_check_name.lower().replace(' ', '_')
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
context_name_for_path = build_check_name.lower().replace(" ", "_")
s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
)
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
url = s3_helper.upload_build_file_to_s3(
report_path, s3_path_prefix + "/report.html"
)
logging.info("Report url %s", url)
total_builds = len(build_results)
@ -182,4 +229,9 @@ if __name__ == "__main__":
print("::notice ::Report url: {}".format(url))
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)
commit.create_status(
context=build_check_name,
description=description,
state=summary_status,
target_url=url,
)

View File

@ -13,16 +13,19 @@ from compress_files import decompress_fast, compress_fast
DOWNLOAD_RETRIES_COUNT = 5
def dowload_file_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
with open(path, 'wb') as f:
with open(path, "wb") as f:
response = requests.get(url, stream=True)
response.raise_for_status()
total_length = response.headers.get('content-length')
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info("No content-length, will download file without progress")
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
@ -34,8 +37,8 @@ def dowload_file_with_progress(url, path):
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
eq_str = '=' * done
space_str = ' ' * (50 - done)
eq_str = "=" * done
space_str = " " * (50 - done)
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush()
break
@ -52,7 +55,9 @@ def dowload_file_with_progress(url, path):
logging.info("Downloading finished")
def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
def get_ccache_if_not_exists(
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
):
ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False
prs_to_check = [current_pr_number]
@ -93,13 +98,16 @@ def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, t
else:
logging.info("ccache downloaded")
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
ccache_name = os.path.basename(path_to_ccache_dir)
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz")
compress_fast(path_to_ccache_dir, compressed_cache_path)
s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
s3_path = (
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
)
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
logging.info("Upload finished")

View File

@ -20,21 +20,29 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
sys.path.append(os.path.join(repo_path, "utils/github"))
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
token = get_parameter_from_ssm("github_robot_token_1")
bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM"))
bp = Backport(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
)
def cherrypick_run(token, pr, branch):
return CherryPick(token,
os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"), pr, branch
).execute(repo_path, False)
return CherryPick(
token,
os.environ.get("REPO_OWNER"),
os.environ.get("REPO_NAME"),
os.environ.get("REPO_TEAM"),
pr,
branch,
).execute(repo_path, False)
try:
bp.execute(repo_path, 'origin', None, cherrypick_run)
bp.execute(repo_path, "origin", None, cherrypick_run)
except subprocess.CalledProcessError as e:
logging.error(e.output)

View File

@ -17,7 +17,9 @@ import sys
class Backport:
def __init__(self, token, owner, name, team):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
self._gh = RemoteRepo(
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
)
self._token = token
self.default_branch_name = self._gh.default_branch
self.ssh_url = self._gh.ssh_url
@ -28,7 +30,7 @@ class Backport:
def getBranchesWithRelease(self):
branches = set()
for pull_request in self._gh.find_pull_requests("release"):
branches.add(pull_request['headRefName'])
branches.add(pull_request["headRefName"])
return branches
def execute(self, repo, upstream, until_commit, run_cherrypick):
@ -44,11 +46,11 @@ class Backport:
branches.append(branch)
if not branches:
logging.info('No release branches found!')
logging.info("No release branches found!")
return
for branch in branches:
logging.info('Found release branch: %s', branch[0])
logging.info("Found release branch: %s", branch[0])
if not until_commit:
until_commit = branches[0][1]
@ -56,73 +58,128 @@ class Backport:
backport_map = {}
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
# pull-requests are sorted by ancestry from the most recent.
for pr in pull_requests:
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
while repo.comparator(branches[-1][1]) >= repo.comparator(
pr["mergeCommit"]["oid"]
):
logging.info(
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
pr["number"], branches[-1][0]
)
)
branches.pop()
logging.info("Processing PR #{}".format(pr['number']))
logging.info("Processing PR #{}".format(pr["number"]))
assert len(branches)
branch_set = set([branch[0] for branch in branches])
# First pass. Find all must-backports
for label in pr['labels']['nodes']:
if label['name'] == 'pr-must-backport':
backport_map[pr['number']] = branch_set.copy()
for label in pr["labels"]["nodes"]:
if label["name"] == "pr-must-backport":
backport_map[pr["number"]] = branch_set.copy()
continue
matched = RE_MUST_BACKPORT.match(label['name'])
matched = RE_MUST_BACKPORT.match(label["name"])
if matched:
if pr['number'] not in backport_map:
backport_map[pr['number']] = set()
backport_map[pr['number']].add(matched.group(1))
if pr["number"] not in backport_map:
backport_map[pr["number"]] = set()
backport_map[pr["number"]].add(matched.group(1))
# Second pass. Find all no-backports
for label in pr['labels']['nodes']:
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
del backport_map[pr['number']]
for label in pr["labels"]["nodes"]:
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
del backport_map[pr["number"]]
break
matched_no_backport = RE_NO_BACKPORT.match(label['name'])
matched_backported = RE_BACKPORTED.match(label['name'])
if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(matched_no_backport.group(1))
logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1))
elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(matched_backported.group(1))
logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1))
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
matched_backported = RE_BACKPORTED.match(label["name"])
if (
matched_no_backport
and pr["number"] in backport_map
and matched_no_backport.group(1) in backport_map[pr["number"]]
):
backport_map[pr["number"]].remove(matched_no_backport.group(1))
logging.info(
"\tskipping %s because of forced no-backport",
matched_no_backport.group(1),
)
elif (
matched_backported
and pr["number"] in backport_map
and matched_backported.group(1) in backport_map[pr["number"]]
):
backport_map[pr["number"]].remove(matched_backported.group(1))
logging.info(
"\tskipping %s because it's already backported manually",
matched_backported.group(1),
)
for pr, branches in list(backport_map.items()):
logging.info('PR #%s needs to be backported to:', pr)
logging.info("PR #%s needs to be backported to:", pr)
for branch in branches:
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
logging.info(
"\t%s, and the status is: %s",
branch,
run_cherrypick(self._token, pr, branch),
)
# print API costs
logging.info('\nGitHub API total costs per query:')
logging.info("\nGitHub API total costs per query:")
for name, value in list(self._gh.api_costs.items()):
logging.info('%s : %s', name, value)
logging.info("%s : %s", name, value)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--token', type=str, required=True, help='token for Github access')
parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH')
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT')
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False)
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin')
parser.add_argument(
"--token", type=str, required=True, help="token for Github access"
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
parser.add_argument(
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="do not create or merge any PRs",
default=False,
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="more verbose output",
default=False,
)
parser.add_argument(
"--upstream",
"-u",
type=str,
help="remote name of upstream in repository",
default="origin",
)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
)
else:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO)
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run)
bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core')
cherrypick_run = lambda token, pr, branch: CherryPick(
token, "ClickHouse", "ClickHouse", "core", pr, branch
).execute(args.repo, args.dry_run)
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
'''
"""
Backports changes from PR to release branch.
Requires multiple separate runs as part of the implementation.
@ -12,7 +12,7 @@ First run should do the following:
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
Third run creates PR from backport branch (with merged previous PR) to release branch.
'''
"""
try:
from clickhouse.utils.github.query import Query as RemoteRepo
@ -29,13 +29,13 @@ import sys
class CherryPick:
class Status(Enum):
DISCARDED = 'discarded'
NOT_INITIATED = 'not started'
FIRST_MERGEABLE = 'waiting for 1st stage'
FIRST_CONFLICTS = 'conflicts on 1st stage'
SECOND_MERGEABLE = 'waiting for 2nd stage'
SECOND_CONFLICTS = 'conflicts on 2nd stage'
MERGED = 'backported'
DISCARDED = "discarded"
NOT_INITIATED = "not started"
FIRST_MERGEABLE = "waiting for 1st stage"
FIRST_CONFLICTS = "conflicts on 1st stage"
SECOND_MERGEABLE = "waiting for 2nd stage"
SECOND_CONFLICTS = "conflicts on 2nd stage"
MERGED = "backported"
def _run(self, args):
out = subprocess.check_output(args).rstrip()
@ -50,51 +50,90 @@ class CherryPick:
# TODO: check if pull-request is merged.
self.merge_commit_oid = self._pr['mergeCommit']['oid']
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
self.target_branch = target_branch
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number)
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid)
self.backport_branch = "backport/{branch}/{pr}".format(
branch=target_branch, pr=pr_number
)
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
branch=target_branch, oid=self.merge_commit_oid
)
def getCherryPickPullRequest(self):
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch)
return self._gh.find_pull_request(
base=self.backport_branch, head=self.cherrypick_branch
)
def createCherryPickPullRequest(self, repo_path):
DESCRIPTION = (
'This pull-request is a first step of an automated backporting.\n'
'It contains changes like after calling a local command `git cherry-pick`.\n'
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n'
'Otherwise, if you do not want to backport them, then just close this pull-request.\n'
'\n'
'The check results does not matter at this step - you can safely ignore them.\n'
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n'
"This pull-request is a first step of an automated backporting.\n"
"It contains changes like after calling a local command `git cherry-pick`.\n"
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
"\n"
"The check results does not matter at this step - you can safely ignore them.\n"
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
)
# FIXME: replace with something better than os.system()
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse']
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid']
git_prefix = [
"git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@yandex-team.ru",
"-c",
"user.name=robot-clickhouse",
]
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
# Create separate branch for backporting, and make it look like real cherry-pick.
self._run(git_prefix + ['checkout', '-f', self.target_branch])
self._run(git_prefix + ['checkout', '-B', self.backport_branch])
self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid])
self._run(git_prefix + ["checkout", "-f", self.target_branch])
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
# Create secondary branch to allow pull request with cherry-picked commit.
self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid])
self._run(
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
)
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)])
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.cherrypick_branch),
]
)
# Create pull-request like a local cherry-pick
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch,
title='Cherry pick #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch,
title=self._pr['title'].replace('"', '\\"')),
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION))
pr = self._gh.create_pull_request(
source=self.cherrypick_branch,
target=self.backport_branch,
title="Cherry pick #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
),
description="Original pull-request #{}\n\n{}".format(
self._pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "do not test")
self._gh.set_label(pr, "pr-cherrypick")
@ -102,36 +141,76 @@ class CherryPick:
return pr
def mergeCherryPickPullRequest(self, cherrypick_pr):
return self._gh.merge_pull_request(cherrypick_pr['id'])
return self._gh.merge_pull_request(cherrypick_pr["id"])
def getBackportPullRequest(self):
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch)
return self._gh.find_pull_request(
base=self.target_branch, head=self.backport_branch
)
def createBackportPullRequest(self, cherrypick_pr, repo_path):
DESCRIPTION = (
'This pull-request is a last step of an automated backporting.\n'
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n'
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n'
"This pull-request is a last step of an automated backporting.\n"
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
)
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse']
git_prefix = [
"git",
"-C",
repo_path,
"-c",
"user.email=robot-clickhouse@clickhouse.com",
"-c",
"user.name=robot-clickhouse",
]
pr_title = 'Backport #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch,
title=self._pr['title'].replace('"', '\\"'))
pr_title = "Backport #{number} to {target}: {title}".format(
number=self._pr["number"],
target=self.target_branch,
title=self._pr["title"].replace('"', '\\"'),
)
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
self._run(
git_prefix
+ [
"reset",
"--soft",
self._run(
git_prefix
+ [
"merge-base",
"origin/" + self.target_branch,
self.backport_branch,
]
),
]
)
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
self._run(
git_prefix
+ [
"push",
"-f",
"origin",
"{branch}:{branch}".format(branch=self.backport_branch),
]
)
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION))
pr = self._gh.create_pull_request(
source=self.backport_branch,
target=self.target_branch,
title=pr_title,
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
),
)
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.add_assignee(pr, self._pr["author"])
self._gh.add_assignee(pr, self._pr["mergedBy"])
self._gh.set_label(pr, "pr-backport")
@ -142,23 +221,43 @@ class CherryPick:
if not pr1:
if not dry_run:
pr1 = self.createCherryPickPullRequest(repo_path)
logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Created PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
else:
return CherryPick.Status.NOT_INITIATED
else:
logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Found PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']:
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
if not dry_run:
pr1 = self.mergeCherryPickPullRequest(pr1)
logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
logging.debug(
"Merged PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if not pr1['merged']:
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
if not pr1["merged"]:
logging.debug(
"Waiting for PR with cherry-pick of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr1["url"],
)
if pr1['closed']:
if pr1["closed"]:
return CherryPick.Status.DISCARDED
elif pr1['mergeable'] == 'CONFLICTING':
elif pr1["mergeable"] == "CONFLICTING":
return CherryPick.Status.FIRST_CONFLICTS
else:
return CherryPick.Status.FIRST_MERGEABLE
@ -167,31 +266,58 @@ class CherryPick:
if not pr2:
if not dry_run:
pr2 = self.createBackportPullRequest(pr1, repo_path)
logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
logging.debug(
"Created PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
else:
return CherryPick.Status.FIRST_MERGEABLE
else:
logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
logging.debug(
"Found PR with backport of %s to %s: %s",
self._pr["number"],
self.target_branch,
pr2["url"],
)
if pr2['merged']:
if pr2["merged"]:
return CherryPick.Status.MERGED
elif pr2['closed']:
elif pr2["closed"]:
return CherryPick.Status.DISCARDED
elif pr2['mergeable'] == 'CONFLICTING':
elif pr2["mergeable"] == "CONFLICTING":
return CherryPick.Status.SECOND_CONFLICTS
else:
return CherryPick.Status.SECOND_MERGEABLE
if __name__ == "__main__":
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
parser = argparse.ArgumentParser()
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access')
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick')
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick')
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH')
parser.add_argument(
"--token", "-t", type=str, required=True, help="token for Github access"
)
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
parser.add_argument(
"--branch",
"-b",
type=str,
required=True,
help="target branch name for cherry-pick",
)
parser.add_argument(
"--repo",
"-r",
type=str,
required=True,
help="path to full repository",
metavar="PATH",
)
args = parser.parse_args()
cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch)
cp = CherryPick(
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
)
cp.execute(args.repo)

View File

@ -20,13 +20,14 @@ class RepositoryBase:
return -1
else:
return 1
self.comparator = functools.cmp_to_key(cmp)
def get_head_commit(self):
return self._repo.commit(self._default)
def iterate(self, begin, end):
rev_range = '{}...{}'.format(begin, end)
rev_range = "{}...{}".format(begin, end)
for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit
@ -39,27 +40,35 @@ class Repository(RepositoryBase):
self._default = self._remote.refs[default_branch_name]
def get_release_branches(self):
'''
"""
Returns sorted list of tuples:
* remote branch (git.refs.remote.RemoteReference),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
"""
release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]:
for branch in [
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else:
release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
class BareRepository(RepositoryBase):
@ -68,24 +77,32 @@ class BareRepository(RepositoryBase):
self._default = self._repo.branches[default_branch_name]
def get_release_branches(self):
'''
"""
Returns sorted list of tuples:
* branch (git.refs.head?),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
"""
release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$')
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]:
for branch in [
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
logging.info(
"Branch %s is not based on branch %s. Ignoring.",
branch.path,
self._default,
)
elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
logging.info(
"Branch %s has more than one base commit. Ignoring.", branch.path
)
else:
release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
return sorted(release_branches, key=lambda x: self.comparator(x[1]))

View File

@ -1,19 +1,20 @@
# -*- coding: utf-8 -*-
class Description:
'''Parsed description representation
'''
"""Parsed description representation"""
MAP_CATEGORY_TO_LABEL = {
'New Feature': 'pr-feature',
'Bug Fix': 'pr-bugfix',
'Improvement': 'pr-improvement',
'Performance Improvement': 'pr-performance',
"New Feature": "pr-feature",
"Bug Fix": "pr-bugfix",
"Improvement": "pr-improvement",
"Performance Improvement": "pr-performance",
# 'Backward Incompatible Change': doesn't match anything
'Build/Testing/Packaging Improvement': 'pr-build',
'Non-significant (changelog entry is not needed)': 'pr-non-significant',
'Non-significant (changelog entry is not required)': 'pr-non-significant',
'Non-significant': 'pr-non-significant',
'Documentation (changelog entry is not required)': 'pr-documentation',
"Build/Testing/Packaging Improvement": "pr-build",
"Non-significant (changelog entry is not needed)": "pr-non-significant",
"Non-significant (changelog entry is not required)": "pr-non-significant",
"Non-significant": "pr-non-significant",
"Documentation (changelog entry is not required)": "pr-documentation",
# 'Other': doesn't match anything
}
@ -21,7 +22,7 @@ class Description:
self.label_name = str()
self.legal = False
self._parse(pull_request['bodyText'])
self._parse(pull_request["bodyText"])
def _parse(self, text):
lines = text.splitlines()
@ -38,14 +39,17 @@ class Description:
category = stripped
next_category = False
if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en':
if (
stripped
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
):
self.legal = True
category_headers = (
'Category (leave one):',
'Changelog category (leave one):',
'Changelog category:',
'Category:'
"Category (leave one):",
"Changelog category (leave one):",
"Changelog category:",
"Category:",
)
if stripped in category_headers:
@ -55,6 +59,6 @@ class Description:
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
else:
if not category:
print('Cannot find category in pr description')
print("Cannot find category in pr description")
else:
print(('Unknown category: ' + category))
print(("Unknown category: " + category))

View File

@ -5,11 +5,11 @@ import time
class Query:
'''
"""
Implements queries to the Github API using GraphQL
'''
"""
_PULL_REQUEST = '''
_PULL_REQUEST = """
author {{
... on User {{
id
@ -47,7 +47,7 @@ class Query:
number
title
url
'''
"""
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
@ -63,14 +63,14 @@ class Query:
self.api_costs = {}
repo = self.get_repository()
self._id = repo['id']
self.ssh_url = repo['sshUrl']
self.default_branch = repo['defaultBranchRef']['name']
self._id = repo["id"]
self.ssh_url = repo["sshUrl"]
self.default_branch = repo["defaultBranchRef"]["name"]
self.members = set(self.get_members())
def get_repository(self):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
name
@ -78,19 +78,19 @@ class Query:
id
sshUrl
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name)
return self._run(query)['repository']
return self._run(query)["repository"]
def get_members(self):
'''Get all team members for organization
"""Get all team members for organization
Returns:
members: a map of members' logins to ids
'''
"""
_QUERY = '''
_QUERY = """
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
@ -105,43 +105,54 @@ class Query:
}}
}}
}}
'''
"""
members = {}
not_end = True
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='')
query = _QUERY.format(
organization=self._owner,
team=self._team,
max_page_size=self._max_page_size,
next="",
)
while not_end:
result = self._run(query)['organization']['team']
result = self._run(query)["organization"]["team"]
if result is None:
break
result = result['members']
not_end = result['pageInfo']['hasNextPage']
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = result["members"]
not_end = result["pageInfo"]["hasNextPage"]
query = _QUERY.format(
organization=self._owner,
team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
members += dict([(node['login'], node['id']) for node in result['nodes']])
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
return members
def get_pull_request(self, number):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, number=number,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
return self._run(query)['repository']['pullRequest']
query = _QUERY.format(
owner=self._owner,
name=self._name,
number=number,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequest"]
def find_pull_request(self, base, head):
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
nodes {{
@ -150,21 +161,27 @@ class Query:
totalCount
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
result = self._run(query)['repository']['pullRequests']
if result['totalCount'] > 0:
return result['nodes'][0]
query = _QUERY.format(
owner=self._owner,
name=self._name,
base=base,
head=head,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
result = self._run(query)["repository"]["pullRequests"]
if result["totalCount"] > 0:
return result["nodes"][0]
else:
return {}
def find_pull_requests(self, label_name):
'''
"""
Get all pull-requests filtered by label name
'''
_QUERY = '''
"""
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
nodes {{
@ -172,18 +189,23 @@ class Query:
}}
}}
}}
'''
"""
query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name,
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
return self._run(query)['repository']['pullRequests']['nodes']
query = _QUERY.format(
owner=self._owner,
name=self._name,
label_name=label_name,
pull_request_data=self._PULL_REQUEST,
min_page_size=self._min_page_size,
)
return self._run(query)["repository"]["pullRequests"]["nodes"]
def get_pull_requests(self, before_commit):
'''
"""
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
'''
"""
_QUERY = '''
_QUERY = """
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
target {{
@ -221,44 +243,60 @@ class Query:
}}
}}
}}
'''
"""
pull_requests = []
not_end = True
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='')
query = _QUERY.format(
owner=self._owner,
name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next="",
)
while not_end:
result = self._run(query)['repository']['defaultBranchRef']['target']['history']
not_end = result['pageInfo']['hasNextPage']
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
"history"
]
not_end = result["pageInfo"]["hasNextPage"]
query = _QUERY.format(
owner=self._owner,
name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
for commit in result['nodes']:
for commit in result["nodes"]:
# FIXME: maybe include `before_commit`?
if str(commit['oid']) == str(before_commit):
if str(commit["oid"]) == str(before_commit):
not_end = False
break
# TODO: fetch all pull-requests that were merged in a single commit.
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size
assert (
commit["associatedPullRequests"]["totalCount"]
<= self._min_page_size
)
for pull_request in commit['associatedPullRequests']['nodes']:
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and
pull_request['baseRefName'] == self.default_branch and
pull_request['mergeCommit']['oid'] == commit['oid']):
for pull_request in commit["associatedPullRequests"]["nodes"]:
if (
pull_request["baseRepository"]["nameWithOwner"]
== "{}/{}".format(self._owner, self._name)
and pull_request["baseRefName"] == self.default_branch
and pull_request["mergeCommit"]["oid"] == commit["oid"]
):
pull_requests.append(pull_request)
return pull_requests
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True):
_QUERY = '''
def create_pull_request(
self, source, target, title, description="", draft=False, can_modify=True
):
_QUERY = """
createPullRequest(input: {{
baseRefName: "{target}",
headRefName: "{source}",
@ -272,15 +310,22 @@ class Query:
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description,
draft="true" if draft else "false", modify="true" if can_modify else "false",
pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest']
query = _QUERY.format(
target=target,
source=source,
id=self._id,
title=title,
body=description,
draft="true" if draft else "false",
modify="true" if can_modify else "false",
pull_request_data=self._PULL_REQUEST,
)
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
def merge_pull_request(self, id):
_QUERY = '''
_QUERY = """
mergePullRequest(input: {{
pullRequestId: "{id}"
}}) {{
@ -288,35 +333,35 @@ class Query:
{pull_request_data}
}}
}}
'''
"""
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest']
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
# FIXME: figure out how to add more assignees at once
def add_assignee(self, pr, assignee):
_QUERY = '''
_QUERY = """
addAssigneesToAssignable(input: {{
assignableId: "{id1}",
assigneeIds: "{id2}"
}}) {{
clientMutationId
}}
'''
"""
query = _QUERY.format(id1=pr['id'], id2=assignee['id'])
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
self._run(query, is_mutation=True)
def set_label(self, pull_request, label_name):
'''
"""
Set label by name to the pull request
Args:
pull_request: JSON object returned by `get_pull_requests()`
label_name (string): label name
'''
"""
_GET_LABEL = '''
_GET_LABEL = """
repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{
@ -330,36 +375,44 @@ class Query:
}}
}}
}}
'''
"""
_SET_LABEL = '''
_SET_LABEL = """
addLabelsToLabelable(input: {{
labelableId: "{pr_id}",
labelIds: "{label_id}"
}}) {{
clientMutationId
}}
'''
"""
labels = []
not_end = True
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='')
query = _GET_LABEL.format(
owner=self._owner,
name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next="",
)
while not_end:
result = self._run(query)['repository']['labels']
not_end = result['pageInfo']['hasNextPage']
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
result = self._run(query)["repository"]["labels"]
not_end = result["pageInfo"]["hasNextPage"]
query = _GET_LABEL.format(
owner=self._owner,
name=self._name,
label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
)
labels += [label for label in result['nodes']]
labels += [label for label in result["nodes"]]
if not labels:
return
query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id'])
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
self._run(query, is_mutation=True)
def _run(self, query, is_mutation=False):
@ -385,19 +438,21 @@ class Query:
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
headers = {'Authorization': 'bearer {}'.format(self._token)}
headers = {"Authorization": "bearer {}".format(self._token)}
if is_mutation:
query = '''
query = """
mutation {{
{query}
}}
'''.format(query=query)
""".format(
query=query
)
else:
query = '''
query = """
query {{
{query}
rateLimit {{
@ -405,23 +460,38 @@ class Query:
remaining
}}
}}
'''.format(query=query)
""".format(
query=query
)
while True:
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
request = requests_retry_session().post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if request.status_code == 200:
result = request.json()
if 'errors' in result:
raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query))
if "errors" in result:
raise Exception(
"Errors occurred: {}\nOriginal query: {}".format(
result["errors"], query
)
)
if not is_mutation:
import inspect
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
if caller not in list(self.api_costs.keys()):
self.api_costs[caller] = 0
self.api_costs[caller] += result['data']['rateLimit']['cost']
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
return result['data']
return result["data"]
else:
import json
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))
raise Exception(
"Query failed with code {code}:\n{json}".format(
code=request.status_code,
json=json.dumps(request.json(), indent=4),
)
)

View File

@ -6,6 +6,7 @@ import json
import requests # type: ignore
from get_robot_token import get_parameter_from_ssm
class ClickHouseHelper:
def __init__(self, url=None, user=None, password=None):
self.url2 = None
@ -15,27 +16,35 @@ class ClickHouseHelper:
url = get_parameter_from_ssm("clickhouse-test-stat-url")
self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2")
self.auth2 = {
'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"),
'X-ClickHouse-Key': ''
"X-ClickHouse-User": get_parameter_from_ssm(
"clickhouse-test-stat-login2"
),
"X-ClickHouse-Key": "",
}
self.url = url
self.auth = {
'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"),
'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password")
"X-ClickHouse-User": user
if user is not None
else get_parameter_from_ssm("clickhouse-test-stat-login"),
"X-ClickHouse-Key": password
if password is not None
else get_parameter_from_ssm("clickhouse-test-stat-password"),
}
@staticmethod
def _insert_json_str_info_impl(url, auth, db, table, json_str):
params = {
'database': db,
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table),
'date_time_input_format': 'best_effort',
'send_logs_level': 'warning',
"database": db,
"query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table),
"date_time_input_format": "best_effort",
"send_logs_level": "warning",
}
for i in range(5):
response = requests.post(url, params=params, data=json_str, headers=auth, verify=False)
response = requests.post(
url, params=params, data=json_str, headers=auth, verify=False
)
logging.info("Response content '%s'", response.content)
@ -43,16 +52,25 @@ class ClickHouseHelper:
break
error = (
"Cannot insert data into clickhouse at try " + str(i)
+ ": HTTP code " + str(response.status_code) + ": '"
+ str(response.text) + "'")
"Cannot insert data into clickhouse at try "
+ str(i)
+ ": HTTP code "
+ str(response.status_code)
+ ": '"
+ str(response.text)
+ "'"
)
if response.status_code >= 500:
# A retriable error
time.sleep(1)
continue
logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body)
logging.info(
"Request headers '%s', body '%s'",
response.request.headers,
response.request.body,
)
raise Exception(error)
else:
@ -72,18 +90,20 @@ class ClickHouseHelper:
for event in events:
jsons.append(json.dumps(event))
self._insert_json_str_info(db, table, ','.join(jsons))
self._insert_json_str_info(db, table, ",".join(jsons))
def _select_and_get_json_each_row(self, db, query):
params = {
'database': db,
'query': query,
'default_format': 'JSONEachRow',
"database": db,
"query": query,
"default_format": "JSONEachRow",
}
for i in range(5):
response = None
try:
response = requests.get(self.url, params=params, headers=self.auth, verify=False)
response = requests.get(
self.url, params=params, headers=self.auth, verify=False
)
response.raise_for_status()
return response.text
except Exception as ex:
@ -97,15 +117,21 @@ class ClickHouseHelper:
def select_json_each_row(self, db, query):
text = self._select_and_get_json_each_row(db, query)
result = []
for line in text.split('\n'):
for line in text.split("\n"):
if line:
result.append(json.loads(line))
return result
def prepare_tests_results_for_clickhouse(
pr_info, test_results,
check_status, check_duration, check_start_time,
report_url, check_name):
pr_info,
test_results,
check_status,
check_duration,
check_start_time,
report_url,
check_name,
):
pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
base_ref = "master"
@ -147,13 +173,14 @@ def prepare_tests_results_for_clickhouse(
test_time = 0
if len(test_result) > 2 and test_result[2]:
test_time = test_result[2]
current_row['test_duration_ms'] = int(float(test_time) * 1000)
current_row['test_name'] = test_name
current_row['test_status'] = test_status
current_row["test_duration_ms"] = int(float(test_time) * 1000)
current_row["test_name"] = test_name
current_row["test_status"] = test_status
result.append(current_row)
return result
def mark_flaky_tests(clickhouse_helper, check_name, test_results):
try:
query = """
@ -164,14 +191,16 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
AND check_name = '{check_name}'
AND (test_status = 'FAIL' OR test_status = 'FLAKY')
AND pull_request_number = 0
""".format(check_name=check_name)
""".format(
check_name=check_name
)
tests_data = clickhouse_helper.select_json_each_row('gh-data', query)
master_failed_tests = {row['test_name'] for row in tests_data}
logging.info("Found flaky tests: %s", ', '.join(master_failed_tests))
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
master_failed_tests = {row["test_name"] for row in tests_data}
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
for test_result in test_results:
if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests:
test_result[1] = 'FLAKY'
if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
test_result[1] = "FLAKY"
except Exception as ex:
logging.info("Exception happened during flaky tests fetch %s", ex)

View File

@ -18,13 +18,16 @@ from tee_popen import TeePopen
NAME = "Woboq Build (actions)"
def get_run_command(repo_path, output_path, image):
cmd = "docker run " + \
f"--volume={repo_path}:/repo_folder " \
f"--volume={output_path}:/test_output " \
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
cmd = (
"docker run " + f"--volume={repo_path}:/repo_folder "
f"--volume={output_path}:/test_output "
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
)
return cmd
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -37,8 +40,8 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser')
s3_helper = S3Helper('https://s3.amazonaws.com')
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
s3_helper = S3Helper("https://s3.amazonaws.com")
result_path = os.path.join(temp_path, "result_path")
if not os.path.exists(result_path):
@ -62,14 +65,20 @@ if __name__ == "__main__":
report_path = os.path.join(result_path, "html_report")
logging.info("Report path %s", report_path)
s3_path_prefix = "codebrowser"
html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports')
html_urls = s3_helper.fast_parallel_upload_dir(
report_path, s3_path_prefix, "clickhouse-test-reports"
)
index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>'
test_results = [(index_html, "Look at the report")]
report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME)
report_url = upload_results(
s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url)
post_commit_status(
gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url
)

View File

@ -14,9 +14,9 @@ def override_status(status, check_name, invert=False):
return "success"
if invert:
if status == 'success':
return 'error'
return 'success'
if status == "success":
return "error"
return "success"
return status
@ -56,6 +56,6 @@ def post_commit_status(gh, sha, check_name, description, state, report_url):
def post_commit_status_to_file(file_path, description, state, report_url):
if os.path.exists(file_path):
raise Exception(f'File "{file_path}" already exists!')
with open(file_path, 'w', encoding='utf-8') as f:
out = csv.writer(f, delimiter='\t')
with open(file_path, "w", encoding="utf-8") as f:
out = csv.writer(f, delimiter="\t")
out.writerow([state, report_url, description])

View File

@ -16,34 +16,40 @@ from build_download_helper import download_builds_filter
from upload_result_helper import upload_results
from docker_pull_helper import get_images_with_versions
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
IMAGE_CENTOS = "clickhouse/test-old-centos"
MAX_GLIBC_VERSION = '2.4'
MAX_GLIBC_VERSION = "2.4"
DOWNLOAD_RETRIES_COUNT = 5
CHECK_NAME = "Compatibility check (actions)"
def process_os_check(log_path):
name = os.path.basename(log_path)
with open(log_path, 'r') as log:
line = log.read().split('\n')[0].strip()
if line != 'OK':
with open(log_path, "r") as log:
line = log.read().split("\n")[0].strip()
if line != "OK":
return (name, "FAIL")
else:
return (name, "OK")
def process_glibc_check(log_path):
bad_lines = []
with open(log_path, 'r') as log:
with open(log_path, "r") as log:
for line in log:
if line.strip():
columns = line.strip().split(' ')
columns = line.strip().split(" ")
symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5
_, version = symbol_with_glibc.split('@GLIBC_')
if version == 'PRIVATE':
_, version = symbol_with_glibc.split("@GLIBC_")
if version == "PRIVATE":
bad_lines.append((symbol_with_glibc, "FAIL"))
elif StrictVersion(version) > MAX_GLIBC_VERSION:
bad_lines.append((symbol_with_glibc, "FAIL"))
@ -51,6 +57,7 @@ def process_glibc_check(log_path):
bad_lines.append(("glibc check", "OK"))
return bad_lines
def process_result(result_folder, server_log_folder):
summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
@ -86,16 +93,18 @@ def process_result(result_folder, server_log_folder):
return status, description, summary, result_logs
def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu):
def get_run_commands(
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
):
return [
f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
f"--volume={build_path}/etc/clickhouse-server:/config " \
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
f"--volume={build_path}/etc/clickhouse-server:/config " \
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
f"--volume={build_path}/etc/clickhouse-server:/config "
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
]
@ -124,14 +133,18 @@ if __name__ == "__main__":
os.makedirs(packages_path)
def url_filter(url):
return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url)
return url.endswith(".deb") and (
"clickhouse-common-static_" in url or "clickhouse-server_" in url
)
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
for f in os.listdir(packages_path):
if '.deb' in f:
if ".deb" in f:
full_path = os.path.join(packages_path, f)
subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True)
subprocess.check_call(
f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True
)
server_log_path = os.path.join(temp_path, "server_log")
if not os.path.exists(server_log_path):
@ -141,7 +154,9 @@ if __name__ == "__main__":
if not os.path.exists(result_path):
os.makedirs(result_path)
run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1])
run_commands = get_run_commands(
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
)
state = "success"
for run_command in run_commands:
@ -154,15 +169,32 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -3,20 +3,21 @@ import subprocess
import logging
import os
def compress_file_fast(path, archive_path):
if os.path.exists('/usr/bin/pigz'):
if os.path.exists("/usr/bin/pigz"):
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
else:
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
def compress_fast(path, archive_path, exclude=None):
pigz_part = ''
if os.path.exists('/usr/bin/pigz'):
pigz_part = ""
if os.path.exists("/usr/bin/pigz"):
logging.info("pigz found, will compress and decompress faster")
pigz_part = "--use-compress-program='pigz'"
else:
pigz_part = '-z'
pigz_part = "-z"
logging.info("no pigz, compressing with default tar")
if exclude is None:
@ -31,21 +32,36 @@ def compress_fast(path, archive_path, exclude=None):
path = os.path.dirname(path)
else:
path += "/.."
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname)
cmd = "tar {} {} -cf {} -C {} {}".format(
pigz_part, exclude_part, archive_path, path, fname
)
logging.debug("compress_fast cmd: %s", cmd)
subprocess.check_call(cmd, shell=True)
def decompress_fast(archive_path, result_path=None):
pigz_part = ''
if os.path.exists('/usr/bin/pigz'):
logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path)
pigz_part = ""
if os.path.exists("/usr/bin/pigz"):
logging.info(
"pigz found, will compress and decompress faster ('%s' -> '%s')",
archive_path,
result_path,
)
pigz_part = "--use-compress-program='pigz'"
else:
pigz_part = '-z'
logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path)
pigz_part = "-z"
logging.info(
"no pigz, decompressing with default tar ('%s' -> '%s')",
archive_path,
result_path,
)
if result_path is None:
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True)
subprocess.check_call(
"tar {} -xf {}".format(pigz_part, archive_path), shell=True
)
else:
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True)
subprocess.check_call(
"tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path),
shell=True,
)

View File

@ -8,23 +8,27 @@ import logging
from typing import Optional
class DockerImage:
def __init__(self, name, version : Optional[str] = None):
def __init__(self, name, version: Optional[str] = None):
self.name = name
if version is None:
self.version = 'latest'
self.version = "latest"
else:
self.version = version
def __str__(self):
return f"{self.name}:{self.version}"
def get_images_with_versions(reports_path, required_image, pull=True, version : Optional[str] = None):
def get_images_with_versions(
reports_path, required_image, pull=True, version: Optional[str] = None
):
images_path = None
for root, _, files in os.walk(reports_path):
for f in files:
if f == 'changed_images.json':
images_path = os.path.join(root, 'changed_images.json')
if f == "changed_images.json":
images_path = os.path.join(root, "changed_images.json")
break
if not images_path:
@ -34,7 +38,7 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
if images_path is not None and os.path.exists(images_path):
logging.info("Images file exists")
with open(images_path, 'r', encoding='utf-8') as images_fd:
with open(images_path, "r", encoding="utf-8") as images_fd:
images = json.load(images_fd)
logging.info("Got images %s", images)
else:
@ -52,15 +56,22 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
for i in range(10):
try:
logging.info("Pulling image %s", docker_image)
latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True)
latest_error = subprocess.check_output(
f"docker pull {docker_image}",
stderr=subprocess.STDOUT,
shell=True,
)
break
except Exception as ex:
time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex)
else:
raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}")
raise Exception(
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
)
return docker_images
def get_image_with_version(reports_path, image, pull=True, version=None):
return get_images_with_versions(reports_path, [image], pull, version=version)[0]

View File

@ -40,7 +40,9 @@ if __name__ == "__main__":
if not pr_info.has_changes_in_documentation():
logging.info("No changes in documentation")
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description="No changes in docs", state="success")
commit.create_status(
context=NAME, description="No changes in docs", state="success"
)
sys.exit(0)
logging.info("Has changes in docs")
@ -48,15 +50,15 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check')
docker_image = get_image_with_version(temp_path, "clickhouse/docs-check")
test_output = os.path.join(temp_path, 'docs_check_log')
test_output = os.path.join(temp_path, "docs_check_log")
if not os.path.exists(test_output):
os.makedirs(test_output)
cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
run_log_path = os.path.join(test_output, 'runlog.log')
run_log_path = os.path.join(test_output, "runlog.log")
logging.info("Running command: '%s'", cmd)
with TeePopen(cmd, run_log_path) as process:
@ -82,10 +84,10 @@ if __name__ == "__main__":
for f in files:
path = os.path.join(test_output, f)
additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file:
with open(path, "r", encoding="utf-8") as check_file:
for line in check_file:
if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL"))
lines.append((line.split(":")[-1], "FAIL"))
if lines:
status = "failure"
description = "Found errors in docs"
@ -94,12 +96,22 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
ch_helper = ClickHouseHelper()
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
lines,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -34,19 +34,23 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release')
docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
test_output = os.path.join(temp_path, 'docs_release_log')
test_output = os.path.join(temp_path, "docs_release_log")
if not os.path.exists(test_output):
os.makedirs(test_output)
token = CLOUDFLARE_TOKEN
cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
cmd = (
"docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent "
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
)
run_log_path = os.path.join(test_output, 'runlog.log')
run_log_path = os.path.join(test_output, "runlog.log")
with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
with open(run_log_path, "w", encoding="utf-8") as log, SSHKey(
"ROBOT_CLICKHOUSE_SSH_KEY"
):
with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process:
retcode = process.wait()
if retcode == 0:
@ -70,10 +74,10 @@ if __name__ == "__main__":
for f in files:
path = os.path.join(test_output, f)
additional_files.append(path)
with open(path, 'r', encoding='utf-8') as check_file:
with open(path, "r", encoding="utf-8") as check_file:
for line in check_file:
if "ERROR" in line:
lines.append((line.split(':')[-1], "FAIL"))
lines.append((line.split(":")[-1], "FAIL"))
if lines:
status = "failure"
description = "Found errors in docs"
@ -82,9 +86,13 @@ if __name__ == "__main__":
else:
lines.append(("Non zero exit code", "FAIL"))
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
)
print("::notice ::Report url: {report_url}")
commit = get_commit(gh, pr_info.sha)
commit.create_status(context=NAME, description=description, state=status, target_url=report_url)
commit.create_status(
context=NAME, description=description, state=status, target_url=report_url
)

View File

@ -22,7 +22,9 @@ CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/relea
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb"
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = (
"clickhouse-common-static-dbg_{version}_amd64.deb"
)
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
@ -35,7 +37,9 @@ class Version:
self.version = version
def __lt__(self, other):
return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.')))
return list(map(int, self.version.split("."))) < list(
map(int, other.version.split("."))
)
def __str__(self):
return self.version
@ -49,6 +53,7 @@ class ReleaseInfo:
def __repr__(self):
return f"ReleaseInfo: {self.version}-{self.type}"
def find_previous_release(server_version, releases):
releases.sort(key=lambda x: x.version, reverse=True)
@ -66,15 +71,26 @@ def get_previous_release(server_version=None):
page = 1
found = False
while not found:
response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100})
response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
if not response.ok:
raise Exception('Cannot load the list of tags from github: ' + response.reason)
raise Exception(
"Cannot load the list of tags from github: " + response.reason
)
releases_str = set(re.findall(VERSION_PATTERN, response.text))
if len(releases_str) == 0:
raise Exception('Cannot find previous release for ' + str(server_version) + ' server version')
raise Exception(
"Cannot find previous release for "
+ str(server_version)
+ " server version"
)
releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str))
releases = list(
map(
lambda x: ReleaseInfo(Version(x.split("-")[0]), x.split("-")[1]),
releases_str,
)
)
found, previous_release = find_previous_release(server_version, releases)
page += 1
@ -87,34 +103,53 @@ def download_packet(url, out_path):
"""
response = requests.get(url)
logging.info('Downloading %s', url)
logging.info("Downloading %s", url)
if response.ok:
open(out_path, 'wb').write(response.content)
open(out_path, "wb").write(response.content)
def download_packets(release, dest_path=PACKETS_DIR):
if not os.path.exists(dest_path):
os.makedirs(dest_path)
logging.info('Will download %s', release)
logging.info("Will download %s", release)
download_packet(
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type),
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version)),
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version),
),
)
download_packet(
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type),
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version)),
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path,
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version),
),
)
download_packet(
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type),
out_path=os.path.join(dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)),
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)
),
)
download_packet(
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type),
out_path=os.path.join(dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)),
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(
version=release.version, type=release.type
),
out_path=os.path.join(
dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)
),
)
@ -123,7 +158,7 @@ def download_previous_release(dest_path):
download_packets(current_release, dest_path=dest_path)
if __name__ == '__main__':
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
server_version = Version(input())
previous_release = get_previous_release(server_version)

View File

@ -7,7 +7,7 @@ from pr_info import PRInfo
from get_robot_token import get_best_robot_token
from commit_status_helper import get_commit
NAME = 'Run Check (actions)'
NAME = "Run Check (actions)"
def filter_statuses(statuses):
@ -36,4 +36,9 @@ if __name__ == "__main__":
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
statuses = filter_statuses(list(commit.get_statuses()))
if NAME in statuses and statuses[NAME].state == "pending":
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url)
commit.create_status(
context=NAME,
description="All checks finished",
state="success",
target_url=url,
)

View File

@ -17,26 +17,35 @@ from build_download_helper import download_all_deb_packages
from download_previous_release import download_previous_release
from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status, get_commit, override_status, post_commit_status_to_file
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from commit_status_helper import (
post_commit_status,
get_commit,
override_status,
post_commit_status_to_file,
)
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
NO_CHANGES_MSG = 'Nothing to run'
NO_CHANGES_MSG = "Nothing to run"
def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
result = []
if 'DatabaseReplicated' in check_name:
if "DatabaseReplicated" in check_name:
result.append("USE_DATABASE_REPLICATED=1")
if 'DatabaseOrdinary' in check_name:
if "DatabaseOrdinary" in check_name:
result.append("USE_DATABASE_ORDINARY=1")
if 'wide parts enabled' in check_name:
if "wide parts enabled" in check_name:
result.append("USE_POLYMORPHIC_PARTS=1")
#temporary
if 's3 storage' in check_name:
# temporary
if "s3 storage" in check_name:
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
if run_by_hash_total != 0:
@ -45,37 +54,55 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
return result
def get_image_name(check_name):
if 'stateless' in check_name.lower():
return 'clickhouse/stateless-test'
if 'stateful' in check_name.lower():
return 'clickhouse/stateful-test'
if "stateless" in check_name.lower():
return "clickhouse/stateless-test"
if "stateful" in check_name.lower():
return "clickhouse/stateful-test"
else:
raise Exception(f"Cannot deduce image name based on check name {check_name}")
def get_run_command(builds_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run):
additional_options = ['--hung-check']
additional_options.append('--print-time')
def get_run_command(
builds_path,
repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
image,
flaky_check,
tests_to_run,
):
additional_options = ["--hung-check"]
additional_options.append("--print-time")
if tests_to_run:
additional_options += tests_to_run
additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"'
additional_options_str = (
'-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
)
envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"']
envs = [
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
'-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"',
]
if flaky_check:
envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800']
envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"]
envs += [f'-e {e}' for e in additional_envs]
envs += [f"-e {e}" for e in additional_envs]
env_str = ' '.join(envs)
env_str = " ".join(envs)
return f"docker run --volume={builds_path}:/package_folder " \
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \
return (
f"docker run --volume={builds_path}:/package_folder "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
)
def get_tests_to_run(pr_info):
@ -85,32 +112,43 @@ def get_tests_to_run(pr_info):
return []
for fpath in pr_info.changed_files:
if 'tests/queries/0_stateless/0' in fpath:
logging.info('File %s changed and seems like stateless test', fpath)
fname = fpath.split('/')[3]
if "tests/queries/0_stateless/0" in fpath:
logging.info("File %s changed and seems like stateless test", fpath)
fname = fpath.split("/")[3]
fname_without_ext = os.path.splitext(fname)[0]
result.add(fname_without_ext + '.')
result.add(fname_without_ext + ".")
return list(result)
def process_results(result_folder, server_log_path):
test_results = []
additional_files = []
# Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files]
if os.path.exists(server_log_path):
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))]
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files]
server_log_files = [
f
for f in os.listdir(server_log_path)
if os.path.isfile(os.path.join(server_log_path, f))
]
additional_files = additional_files + [
os.path.join(server_log_path, f) for f in server_log_files
]
status = []
status_path = os.path.join(result_folder, "check_status.tsv")
if os.path.exists(status_path):
logging.info("Found test_results.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file:
status = list(csv.reader(status_file, delimiter='\t'))
with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2:
logging.info("Files in result folder %s", os.listdir(result_folder))
@ -125,8 +163,8 @@ def process_results(result_folder, server_log_path):
logging.info("Files in result folder %s", os.listdir(result_folder))
return "error", "Not found test_results.tsv", test_results, additional_files
with open(results_path, 'r', encoding='utf-8') as results_file:
test_results = list(csv.reader(results_file, delimiter='\t'))
with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0:
return "error", "Empty test_results.tsv", test_results, additional_files
@ -137,8 +175,17 @@ def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("check_name")
parser.add_argument("kill_timeout", type=int)
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable")
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status")
parser.add_argument(
"--validate-bugfix",
action="store_true",
help="Check that added tests failed on latest stable",
)
parser.add_argument(
"--post-commit-status",
default="commit_status",
choices=["commit_status", "file"],
help="Where to public post commit status",
)
return parser.parse_args()
@ -156,7 +203,7 @@ if __name__ == "__main__":
kill_timeout = args.kill_timeout
validate_bugix_check = args.validate_bugfix
flaky_check = 'flaky' in check_name.lower()
flaky_check = "flaky" in check_name.lower()
run_changed_tests = flaky_check or validate_bugix_check
gh = Github(get_best_robot_token())
@ -166,16 +213,23 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels:
if args.post_commit_status == 'file':
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null')
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
"Skipped (no pr-bugfix)",
"success",
"null",
)
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
sys.exit(0)
if 'RUN_BY_HASH_NUM' in os.environ:
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
if "RUN_BY_HASH_NUM" in os.environ:
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else:
run_by_hash_num = 0
run_by_hash_total = 0
@ -191,12 +245,18 @@ if __name__ == "__main__":
tests_to_run = get_tests_to_run(pr_info)
if not tests_to_run:
commit = get_commit(gh, pr_info.sha)
state = override_status('success', check_name, validate_bugix_check)
if args.post_commit_status == 'commit_status':
commit.create_status(context=check_name_with_group, description=NO_CHANGES_MSG, state=state)
elif args.post_commit_status == 'file':
state = override_status("success", check_name, validate_bugix_check)
if args.post_commit_status == "commit_status":
commit.create_status(
context=check_name_with_group,
description=NO_CHANGES_MSG,
state=state,
)
elif args.post_commit_status == "file":
fpath = os.path.join(temp_path, "post_commit_status.tsv")
post_commit_status_to_file(fpath, description=NO_CHANGES_MSG, state=state, report_url='null')
post_commit_status_to_file(
fpath, description=NO_CHANGES_MSG, state=state, report_url="null"
)
sys.exit(0)
image_name = get_image_name(check_name)
@ -223,11 +283,23 @@ if __name__ == "__main__":
run_log_path = os.path.join(result_path, "runlog.log")
additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total)
additional_envs = get_additional_envs(
check_name, run_by_hash_num, run_by_hash_total
)
if validate_bugix_check:
additional_envs.append('GLOBAL_TAGS=no-random-settings')
additional_envs.append("GLOBAL_TAGS=no-random-settings")
run_command = get_run_command(packages_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run)
run_command = get_run_command(
packages_path,
repo_tests_path,
result_path,
server_log_path,
kill_timeout,
additional_envs,
docker_image,
flaky_check,
tests_to_run,
)
logging.info("Going to run func tests: %s", run_command)
with TeePopen(run_command, run_log_path) as process:
@ -239,29 +311,55 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_results(result_path, server_log_path)
state, description, test_results, additional_logs = process_results(
result_path, server_log_path
)
state = override_status(state, check_name, validate_bugix_check)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[run_log_path] + additional_logs,
check_name_with_group,
)
print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == 'commit_status':
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
elif args.post_commit_status == 'file':
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url)
if args.post_commit_status == "commit_status":
post_commit_status(
gh, pr_info.sha, check_name_with_group, description, state, report_url
)
elif args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
description,
state,
report_url,
)
else:
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"')
raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'
)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name_with_group,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
if state != 'success':
if 'force-tests' in pr_info.labels:
if state != "success":
if "force-tests" in pr_info.labels:
print("'force-tests' enabled, will report success")
else:
sys.exit(1)

View File

@ -2,13 +2,15 @@
import boto3 # type: ignore
from github import Github # type: ignore
def get_parameter_from_ssm(name, decrypt=True, client=None):
if not client:
client = boto3.client('ssm', region_name='us-east-1')
return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value']
client = boto3.client("ssm", region_name="us-east-1")
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
client = boto3.client('ssm', region_name='us-east-1')
client = boto3.client("ssm", region_name="us-east-1")
tokens = {}
for i in range(1, total_tokens + 1):
token_name = token_prefix_env_name + str(i)

View File

@ -18,8 +18,16 @@ from build_download_helper import download_all_deb_packages
from download_previous_release import download_previous_release
from upload_result_helper import upload_results
from docker_pull_helper import get_images_with_versions
from commit_status_helper import post_commit_status, override_status, post_commit_status_to_file
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from commit_status_helper import (
post_commit_status,
override_status,
post_commit_status_to_file,
)
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
@ -41,24 +49,28 @@ IMAGES = [
"clickhouse/dotnet-client",
]
def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num):
def get_json_params_dict(
check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num
):
return {
'context_name': check_name,
'commit': pr_info.sha,
'pull_request': pr_info.number,
'pr_info': {'changed_files' : list(pr_info.changed_files)},
'docker_images_with_versions': docker_images,
'shuffle_test_groups': False,
'use_tmpfs': False,
'disable_net_host': True,
'run_by_hash_total': run_by_hash_total,
'run_by_hash_num': run_by_hash_num,
"context_name": check_name,
"commit": pr_info.sha,
"pull_request": pr_info.number,
"pr_info": {"changed_files": list(pr_info.changed_files)},
"docker_images_with_versions": docker_images,
"shuffle_test_groups": False,
"use_tmpfs": False,
"disable_net_host": True,
"run_by_hash_total": run_by_hash_total,
"run_by_hash_num": run_by_hash_num,
}
def get_env_for_runner(build_path, repo_path, result_path, work_path):
binary_path = os.path.join(build_path, 'clickhouse')
odbc_bridge_path = os.path.join(build_path, 'clickhouse-odbc-bridge')
library_bridge_path = os.path.join(build_path, 'clickhouse-library-bridge')
binary_path = os.path.join(build_path, "clickhouse")
odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge")
library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge")
my_env = os.environ.copy()
my_env["CLICKHOUSE_TESTS_BUILD_PATH"] = build_path
@ -70,25 +82,30 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path):
my_env["CLICKHOUSE_TESTS_RESULT_PATH"] = result_path
my_env["CLICKHOUSE_TESTS_BASE_CONFIG_DIR"] = f"{repo_path}/programs/server"
my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json")
my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = '0'
my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0"
return my_env
def process_results(result_folder):
test_results = []
additional_files = []
# Just upload all files from result_folder.
# If task provides processed results, then it's responsible for content of result_folder.
if os.path.exists(result_folder):
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
test_files = [
f
for f in os.listdir(result_folder)
if os.path.isfile(os.path.join(result_folder, f))
]
additional_files = [os.path.join(result_folder, f) for f in test_files]
status = []
status_path = os.path.join(result_folder, "check_status.tsv")
if os.path.exists(status_path):
logging.info("Found test_results.tsv")
with open(status_path, 'r', encoding='utf-8') as status_file:
status = list(csv.reader(status_file, delimiter='\t'))
with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter="\t"))
if len(status) != 1 or len(status[0]) != 2:
logging.info("Files in result folder %s", os.listdir(result_folder))
@ -97,8 +114,8 @@ def process_results(result_folder):
results_path = os.path.join(result_folder, "test_results.tsv")
if os.path.exists(results_path):
with open(results_path, 'r', encoding='utf-8') as results_file:
test_results = list(csv.reader(results_file, delimiter='\t'))
with open(results_path, "r", encoding="utf-8") as results_file:
test_results = list(csv.reader(results_file, delimiter="\t"))
if len(test_results) == 0:
return "error", "Empty test_results.tsv", test_results, additional_files
@ -108,8 +125,17 @@ def process_results(result_folder):
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("check_name")
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable")
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status")
parser.add_argument(
"--validate-bugfix",
action="store_true",
help="Check that added tests failed on latest stable",
)
parser.add_argument(
"--post-commit-status",
default="commit_status",
choices=["commit_status", "file"],
help="Where to public post commit status",
)
return parser.parse_args()
@ -126,10 +152,12 @@ if __name__ == "__main__":
check_name = args.check_name
validate_bugix_check = args.validate_bugfix
if 'RUN_BY_HASH_NUM' in os.environ:
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
if "RUN_BY_HASH_NUM" in os.environ:
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else:
run_by_hash_num = 0
run_by_hash_total = 0
@ -138,12 +166,17 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
is_flaky_check = 'flaky' in check_name
is_flaky_check = "flaky" in check_name
pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugix_check)
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels:
if args.post_commit_status == 'file':
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null')
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
if args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
"Skipped (no pr-bugfix)",
"success",
"null",
)
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
sys.exit(0)
@ -175,9 +208,19 @@ if __name__ == "__main__":
my_env = get_env_for_runner(build_path, repo_path, result_path, work_path)
json_path = os.path.join(work_path, 'params.json')
with open(json_path, 'w', encoding='utf-8') as json_params:
json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num)))
json_path = os.path.join(work_path, "params.json")
with open(json_path, "w", encoding="utf-8") as json_params:
json_params.write(
json.dumps(
get_json_params_dict(
check_name,
pr_info,
images_with_versions,
run_by_hash_total,
run_by_hash_num,
)
)
)
output_path_log = os.path.join(result_path, "main_script_log.txt")
@ -199,16 +242,41 @@ if __name__ == "__main__":
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
s3_helper = S3Helper('https://s3.amazonaws.com')
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False)
s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[output_path_log] + additional_logs,
check_name_with_group,
False,
)
print(f"::notice:: {check_name} Report url: {report_url}")
if args.post_commit_status == 'commit_status':
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
elif args.post_commit_status == 'file':
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url)
if args.post_commit_status == "commit_status":
post_commit_status(
gh, pr_info.sha, check_name_with_group, description, state, report_url
)
elif args.post_commit_status == "file":
post_commit_status_to_file(
os.path.join(temp_path, "post_commit_status.tsv"),
description,
state,
report_url,
)
else:
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"')
raise Exception(
f'Unknown post_commit_status option "{args.post_commit_status}"'
)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name_with_group,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -24,10 +24,10 @@ from ssh import SSHKey
from build_download_helper import get_build_name_for_check
from rerun_helper import RerunHelper
JEPSEN_GROUP_NAME = 'jepsen_group'
JEPSEN_GROUP_NAME = "jepsen_group"
DESIRED_INSTANCE_COUNT = 3
IMAGE_NAME = 'clickhouse/keeper-jepsen-test'
CHECK_NAME = 'ClickHouse Keeper Jepsen (actions)'
IMAGE_NAME = "clickhouse/keeper-jepsen-test"
CHECK_NAME = "ClickHouse Keeper Jepsen (actions)"
SUCCESSFUL_TESTS_ANCHOR = "# Successful tests"
@ -35,45 +35,58 @@ INTERMINATE_TESTS_ANCHOR = "# Indeterminate tests"
CRASHED_TESTS_ANCHOR = "# Crashed tests"
FAILED_TESTS_ANCHOR = "# Failed tests"
def _parse_jepsen_output(path):
test_results = []
current_type = ''
with open(path, 'r') as f:
current_type = ""
with open(path, "r") as f:
for line in f:
if SUCCESSFUL_TESTS_ANCHOR in line:
current_type = 'OK'
current_type = "OK"
elif INTERMINATE_TESTS_ANCHOR in line or CRASHED_TESTS_ANCHOR in line:
current_type = 'ERROR'
current_type = "ERROR"
elif FAILED_TESTS_ANCHOR in line:
current_type = 'FAIL'
current_type = "FAIL"
if (line.startswith('store/clickhouse-keeper') or line.startswith('clickhouse-keeper')) and current_type:
if (
line.startswith("store/clickhouse-keeper")
or line.startswith("clickhouse-keeper")
) and current_type:
test_results.append((line.strip(), current_type))
return test_results
def get_autoscaling_group_instances_ids(asg_client, group_name):
group_description = asg_client.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name])
our_group = group_description['AutoScalingGroups'][0]
group_description = asg_client.describe_auto_scaling_groups(
AutoScalingGroupNames=[group_name]
)
our_group = group_description["AutoScalingGroups"][0]
instance_ids = []
for instance in our_group['Instances']:
if instance['LifecycleState'] == 'InService' and instance['HealthStatus'] == 'Healthy':
instance_ids.append(instance['InstanceId'])
for instance in our_group["Instances"]:
if (
instance["LifecycleState"] == "InService"
and instance["HealthStatus"] == "Healthy"
):
instance_ids.append(instance["InstanceId"])
return instance_ids
def get_instances_addresses(ec2_client, instance_ids):
ec2_response = ec2_client.describe_instances(InstanceIds = instance_ids)
ec2_response = ec2_client.describe_instances(InstanceIds=instance_ids)
instance_ips = []
for instances in ec2_response['Reservations']:
for ip in instances['Instances']:
instance_ips.append(ip['PrivateIpAddress'])
for instances in ec2_response["Reservations"]:
for ip in instances["Instances"]:
instance_ips.append(ip["PrivateIpAddress"])
return instance_ips
def prepare_autoscaling_group_and_get_hostnames():
asg_client = boto3.client('autoscaling', region_name='us-east-1')
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT)
asg_client = boto3.client("autoscaling", region_name="us-east-1")
asg_client.set_desired_capacity(
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT
)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter = 0
@ -84,13 +97,15 @@ def prepare_autoscaling_group_and_get_hostnames():
if counter > 30:
raise Exception("Cannot wait autoscaling group")
ec2_client = boto3.client('ec2', region_name='us-east-1')
ec2_client = boto3.client("ec2", region_name="us-east-1")
return get_instances_addresses(ec2_client, instances)
def clear_autoscaling_group():
asg_client = boto3.client('autoscaling', region_name='us-east-1')
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0)
asg_client = boto3.client("autoscaling", region_name="us-east-1")
asg_client.set_desired_capacity(
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0
)
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
counter = 0
while len(instances) > 0:
@ -103,15 +118,28 @@ def clear_autoscaling_group():
def save_nodes_to_file(instances, temp_path):
nodes_path = os.path.join(temp_path, "nodes.txt")
with open(nodes_path, 'w') as f:
with open(nodes_path, "w") as f:
f.write("\n".join(instances))
f.flush()
return nodes_path
def get_run_command(ssh_auth_sock, ssh_sock_dir, pr_info, nodes_path, repo_path, build_url, result_path, docker_image):
return f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} " \
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output " \
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
def get_run_command(
ssh_auth_sock,
ssh_sock_dir,
pr_info,
nodes_path,
repo_path,
build_url,
result_path,
docker_image,
):
return (
f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} "
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output "
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
@ -120,9 +148,14 @@ if __name__ == "__main__":
pr_info = PRInfo()
logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels)
logging.info(
"Start at PR number %s, commit sha %s labels %s",
pr_info.number,
pr_info.sha,
pr_info.labels,
)
if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels:
if pr_info.number != 0 and "jepsen-test" not in pr_info.labels:
logging.info("Not jepsen test label in labels list, skipping")
sys.exit(0)
@ -167,13 +200,24 @@ if __name__ == "__main__":
head = requests.head(build_url)
counter += 1
if counter >= 180:
post_commit_status(gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", "")
post_commit_status(
gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", ""
)
raise Exception("Cannot fetch build")
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + '\n'):
ssh_auth_sock = os.environ['SSH_AUTH_SOCK']
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + "\n"):
ssh_auth_sock = os.environ["SSH_AUTH_SOCK"]
auth_sock_dir = os.path.dirname(ssh_auth_sock)
cmd = get_run_command(ssh_auth_sock, auth_sock_dir, pr_info, nodes_path, REPO_COPY, build_url, result_path, docker_image)
cmd = get_run_command(
ssh_auth_sock,
auth_sock_dir,
pr_info,
nodes_path,
REPO_COPY,
build_url,
result_path,
docker_image,
)
logging.info("Going to run jepsen: %s", cmd)
run_log_path = os.path.join(TEMP_PATH, "runlog.log")
@ -185,31 +229,49 @@ if __name__ == "__main__":
else:
logging.info("Run failed")
status = 'success'
description = 'No invalid analysis found ヽ(‘ー`)'
jepsen_log_path = os.path.join(result_path, 'jepsen_run_all_tests.log')
status = "success"
description = "No invalid analysis found ヽ(‘ー`)"
jepsen_log_path = os.path.join(result_path, "jepsen_run_all_tests.log")
additional_data = []
try:
test_result = _parse_jepsen_output(jepsen_log_path)
if any(r[1] == 'FAIL' for r in test_result):
status = 'failure'
description = 'Found invalid analysis (ノಥ益ಥ)ノ ┻━┻'
if any(r[1] == "FAIL" for r in test_result):
status = "failure"
description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻"
compress_fast(os.path.join(result_path, 'store'), os.path.join(result_path, 'jepsen_store.tar.gz'))
additional_data.append(os.path.join(result_path, 'jepsen_store.tar.gz'))
compress_fast(
os.path.join(result_path, "store"),
os.path.join(result_path, "jepsen_store.tar.gz"),
)
additional_data.append(os.path.join(result_path, "jepsen_store.tar.gz"))
except Exception as ex:
print("Exception", ex)
status = 'failure'
description = 'No Jepsen output log'
test_result = [('No Jepsen output log', 'FAIL')]
status = "failure"
description = "No Jepsen output log"
test_result = [("No Jepsen output log", "FAIL")]
s3_helper = S3Helper('https://s3.amazonaws.com')
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_result, [run_log_path] + additional_data, CHECK_NAME)
s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_result,
[run_log_path] + additional_data,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url)
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_result,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
clear_autoscaling_group()

View File

@ -19,14 +19,26 @@ from commit_status_helper import get_commit, post_commit_status
from tee_popen import TeePopen
from rerun_helper import RerunHelper
IMAGE_NAME = 'clickhouse/performance-comparison'
IMAGE_NAME = "clickhouse/performance-comparison"
def get_run_command(workspace, result_path, repo_tests_path, pr_to_test, sha_to_test, additional_env, image):
return f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output " \
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " \
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " \
def get_run_command(
workspace,
result_path,
repo_tests_path,
pr_to_test,
sha_to_test,
additional_env,
image,
):
return (
f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} "
f"{image}"
)
class RamDrive:
def __init__(self, path, size):
@ -37,11 +49,14 @@ class RamDrive:
if not os.path.exists(self.path):
os.makedirs(self.path)
subprocess.check_call(f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True)
subprocess.check_call(
f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True
)
def __exit__(self, exc_type, exc_val, exc_tb):
subprocess.check_call(f"sudo umount {self.path}", shell=True)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
@ -49,7 +64,7 @@ if __name__ == "__main__":
repo_tests_path = os.path.join(repo_path, "tests")
ramdrive_path = os.getenv("RAMDRIVE_PATH", os.path.join(temp_path, "ramdrive"))
# currently unused, doesn't make tests more stable
ramdrive_size = os.getenv("RAMDRIVE_SIZE", '0G')
ramdrive_size = os.getenv("RAMDRIVE_SIZE", "0G")
reports_path = os.getenv("REPORTS_PATH", "./reports")
check_name = sys.argv[1]
@ -57,14 +72,14 @@ if __name__ == "__main__":
if not os.path.exists(temp_path):
os.makedirs(temp_path)
with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file:
with open(os.getenv("GITHUB_EVENT_PATH"), "r", encoding="utf-8") as event_file:
event = json.load(event_file)
gh = Github(get_best_robot_token())
pr_info = PRInfo(event)
commit = get_commit(gh, pr_info.sha)
docker_env = ''
docker_env = ""
docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds"
@ -75,13 +90,16 @@ if __name__ == "__main__":
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
docker_env += ' -e CHPC_ADD_REPORT_LINKS="<a href={}>Job (actions)</a> <a href={}>Tested commit</a>"'.format(
task_url, pr_link)
task_url, pr_link
)
if 'RUN_BY_HASH_TOTAL' in os.environ:
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
docker_env += f' -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}'
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
if "RUN_BY_HASH_TOTAL" in os.environ:
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
docker_env += f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}"
check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
else:
check_name_with_group = check_name
@ -92,12 +110,20 @@ if __name__ == "__main__":
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
#with RamDrive(ramdrive_path, ramdrive_size):
# with RamDrive(ramdrive_path, ramdrive_size):
result_path = ramdrive_path
if not os.path.exists(result_path):
os.makedirs(result_path)
run_command = get_run_command(result_path, result_path, repo_tests_path, pr_info.number, pr_info.sha, docker_env, docker_image)
run_command = get_run_command(
result_path,
result_path,
repo_tests_path,
pr_info.number,
pr_info.sha,
docker_env,
docker_image,
)
logging.info("Going to run command %s", run_command)
run_log_path = os.path.join(temp_path, "runlog.log")
with TeePopen(run_command, run_log_path) as process:
@ -110,74 +136,83 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
paths = {
'compare.log': os.path.join(result_path, 'compare.log'),
'output.7z': os.path.join(result_path, 'output.7z'),
'report.html': os.path.join(result_path, 'report.html'),
'all-queries.html': os.path.join(result_path, 'all-queries.html'),
'queries.rep': os.path.join(result_path, 'queries.rep'),
'all-query-metrics.tsv': os.path.join(result_path, 'report/all-query-metrics.tsv'),
'runlog.log': run_log_path,
"compare.log": os.path.join(result_path, "compare.log"),
"output.7z": os.path.join(result_path, "output.7z"),
"report.html": os.path.join(result_path, "report.html"),
"all-queries.html": os.path.join(result_path, "all-queries.html"),
"queries.rep": os.path.join(result_path, "queries.rep"),
"all-query-metrics.tsv": os.path.join(
result_path, "report/all-query-metrics.tsv"
),
"runlog.log": run_log_path,
}
check_name_prefix = check_name_with_group.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_')
s3_prefix = f'{pr_info.number}/{pr_info.sha}/{check_name_prefix}/'
s3_helper = S3Helper('https://s3.amazonaws.com')
check_name_prefix = (
check_name_with_group.lower()
.replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace(",", "_")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/"
s3_helper = S3Helper("https://s3.amazonaws.com")
for file in paths:
try:
paths[file] = s3_helper.upload_test_report_to_s3(paths[file],
s3_prefix + file)
paths[file] = s3_helper.upload_test_report_to_s3(
paths[file], s3_prefix + file
)
except Exception:
paths[file] = ''
paths[file] = ""
traceback.print_exc()
# Upload all images and flamegraphs to S3
try:
s3_helper.upload_test_folder_to_s3(
os.path.join(result_path, 'images'),
s3_prefix + 'images'
os.path.join(result_path, "images"), s3_prefix + "images"
)
except Exception:
traceback.print_exc()
# Try to fetch status from the report.
status = ''
message = ''
status = ""
message = ""
try:
report_text = open(os.path.join(result_path, 'report.html'), 'r').read()
status_match = re.search('<!--[ ]*status:(.*)-->', report_text)
message_match = re.search('<!--[ ]*message:(.*)-->', report_text)
report_text = open(os.path.join(result_path, "report.html"), "r").read()
status_match = re.search("<!--[ ]*status:(.*)-->", report_text)
message_match = re.search("<!--[ ]*message:(.*)-->", report_text)
if status_match:
status = status_match.group(1).strip()
if message_match:
message = message_match.group(1).strip()
# TODO: Remove me, always green mode for the first time
status = 'success'
status = "success"
except Exception:
traceback.print_exc()
status = 'failure'
message = 'Failed to parse the report.'
status = "failure"
message = "Failed to parse the report."
if not status:
status = 'failure'
message = 'No status in report.'
status = "failure"
message = "No status in report."
elif not message:
status = 'failure'
message = 'No message in report.'
status = "failure"
message = "No message in report."
report_url = task_url
if paths['runlog.log']:
report_url = paths['runlog.log']
if paths["runlog.log"]:
report_url = paths["runlog.log"]
if paths['compare.log']:
report_url = paths['compare.log']
if paths["compare.log"]:
report_url = paths["compare.log"]
if paths['output.7z']:
report_url = paths['output.7z']
if paths["output.7z"]:
report_url = paths["output.7z"]
if paths['report.html']:
report_url = paths['report.html']
if paths["report.html"]:
report_url = paths["report.html"]
post_commit_status(gh, pr_info.sha, check_name_with_group, message, status, report_url)
post_commit_status(
gh, pr_info.sha, check_name_with_group, message, status, report_url
)

View File

@ -92,27 +92,27 @@ HTML_TEST_PART = """
</table>
"""
BASE_HEADERS = ['Test name', 'Test status']
BASE_HEADERS = ["Test name", "Test status"]
class ReportColorTheme:
class ReportColor:
yellow = '#FFB400'
red = '#F00'
green = '#0A0'
blue = '#00B4FF'
yellow = "#FFB400"
red = "#F00"
green = "#0A0"
blue = "#00B4FF"
default = (ReportColor.green, ReportColor.red, ReportColor.yellow)
bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue)
def _format_header(header, branch_name, branch_url=None):
result = ' '.join([w.capitalize() for w in header.split(' ')])
result = " ".join([w.capitalize() for w in header.split(" ")])
result = result.replace("Clickhouse", "ClickHouse")
result = result.replace("clickhouse", "ClickHouse")
if 'ClickHouse' not in result:
result = 'ClickHouse ' + result
result += ' for '
if "ClickHouse" not in result:
result = "ClickHouse " + result
result += " for "
if branch_url:
result += '<a href="{url}">{name}</a>'.format(url=branch_url, name=branch_name)
else:
@ -121,27 +121,27 @@ def _format_header(header, branch_name, branch_url=None):
def _get_status_style(status, colortheme=None):
ok_statuses = ('OK', 'success', 'PASSED')
fail_statuses = ('FAIL', 'failure', 'error', 'FAILED', 'Timeout')
ok_statuses = ("OK", "success", "PASSED")
fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout")
if colortheme is None:
colortheme = ReportColorTheme.default
style = "font-weight: bold;"
if status in ok_statuses:
style += f'color: {colortheme[0]};'
style += f"color: {colortheme[0]};"
elif status in fail_statuses:
style += f'color: {colortheme[1]};'
style += f"color: {colortheme[1]};"
else:
style += f'color: {colortheme[2]};'
style += f"color: {colortheme[2]};"
return style
def _get_html_url_name(url):
if isinstance(url, str):
return os.path.basename(url).replace('%2B', '+').replace('%20', ' ')
return os.path.basename(url).replace("%2B", "+").replace("%20", " ")
if isinstance(url, tuple):
return url[1].replace('%2B', '+').replace('%20', ' ')
return url[1].replace("%2B", "+").replace("%20", " ")
return None
@ -153,11 +153,24 @@ def _get_html_url(url):
if isinstance(url, tuple):
href, name = url[0], _get_html_url_name(url)
if href and name:
return '<a href="{href}">{name}</a>'.format(href=href, name=_get_html_url_name(url))
return ''
return '<a href="{href}">{name}</a>'.format(
href=href, name=_get_html_url_name(url)
)
return ""
def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=None, with_raw_logs=False, statuscolors=None):
def create_test_html_report(
header,
test_result,
raw_log_url,
task_url,
branch_url,
branch_name,
commit_url,
additional_urls=None,
with_raw_logs=False,
statuscolors=None,
):
if additional_urls is None:
additional_urls = []
@ -181,9 +194,9 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
has_test_logs = True
row = "<tr>"
is_fail = test_status in ('FAIL', 'FLAKY')
is_fail = test_status in ("FAIL", "FLAKY")
if is_fail and with_raw_logs and test_logs is not None:
row = "<tr class=\"failed\">"
row = '<tr class="failed">'
row += "<td>" + test_name + "</td>"
style = _get_status_style(test_status, colortheme=statuscolors)
@ -193,7 +206,13 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
num_fails = num_fails + 1
is_fail_id = 'id="fail' + str(num_fails) + '" '
row += '<td ' + is_fail_id + 'style="{}">'.format(style) + test_status + "</td>"
row += (
"<td "
+ is_fail_id
+ 'style="{}">'.format(style)
+ test_status
+ "</td>"
)
if test_time is not None:
row += "<td>" + test_time + "</td>"
@ -205,24 +224,26 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
row += "</tr>"
rows_part += row
if test_logs is not None and with_raw_logs:
row = "<tr class=\"failed-content\">"
row = '<tr class="failed-content">'
# TODO: compute colspan too
row += "<td colspan=\"3\"><pre>" + test_logs + "</pre></td>"
row += '<td colspan="3"><pre>' + test_logs + "</pre></td>"
row += "</tr>"
rows_part += row
headers = BASE_HEADERS
if has_test_time:
headers.append('Test time, sec.')
headers.append("Test time, sec.")
if has_test_logs and not with_raw_logs:
headers.append('Logs')
headers.append("Logs")
headers = ''.join(['<th>' + h + '</th>' for h in headers])
headers = "".join(["<th>" + h + "</th>" for h in headers])
test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part)
else:
test_part = ""
additional_html_urls = ' '.join([_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)])
additional_html_urls = " ".join(
[_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)]
)
result = HTML_BASE_TEST_TEMPLATE.format(
title=_format_header(header, branch_name),
@ -233,7 +254,7 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
test_part=test_part,
branch_name=branch_name,
commit_url=commit_url,
additional_urls=additional_html_urls
additional_urls=additional_html_urls,
)
return result
@ -297,9 +318,20 @@ tr:hover td {{filter: brightness(95%);}}
LINK_TEMPLATE = '<a href="{url}">{text}</a>'
def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url):
def create_build_html_report(
header,
build_results,
build_logs_urls,
artifact_urls_list,
task_url,
branch_url,
branch_name,
commit_url,
):
rows = ""
for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list):
for (build_result, build_log_url, artifact_urls) in zip(
build_results, build_logs_urls, artifact_urls_list
):
row = "<tr>"
row += "<td>{}</td>".format(build_result.compiler)
if build_result.build_type:
@ -326,18 +358,20 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
if build_result.elapsed_seconds:
delta = datetime.timedelta(seconds=build_result.elapsed_seconds)
else:
delta = 'unknown'
delta = "unknown"
row += '<td>{}</td>'.format(str(delta))
row += "<td>{}</td>".format(str(delta))
links = ""
link_separator = "<br/>"
if artifact_urls:
for artifact_url in artifact_urls:
links += LINK_TEMPLATE.format(text=_get_html_url_name(artifact_url), url=artifact_url)
links += LINK_TEMPLATE.format(
text=_get_html_url_name(artifact_url), url=artifact_url
)
links += link_separator
if links:
links = links[:-len(link_separator)]
links = links[: -len(link_separator)]
row += "<td>{}</td>".format(links)
row += "</tr>"
@ -348,4 +382,5 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
rows=rows,
task_url=task_url,
branch_name=branch_name,
commit_url=commit_url)
commit_url=commit_url,
)

View File

@ -2,6 +2,7 @@
from commit_status_helper import get_commit
def _filter_statuses(statuses):
"""
Squash statuses to latest state
@ -19,7 +20,6 @@ def _filter_statuses(statuses):
class RerunHelper:
def __init__(self, gh, pr_info, check_name):
self.gh = gh
self.pr_info = pr_info
@ -30,6 +30,9 @@ class RerunHelper:
def is_already_finished_by_status(self):
# currently we agree even for failed statuses
for status in self.statuses:
if self.check_name in status.context and status.state in ('success', 'failure'):
if self.check_name in status.context and status.state in (
"success",
"failure",
):
return True
return False

View File

@ -34,30 +34,59 @@ def _flatten_list(lst):
class S3Helper:
def __init__(self, host):
self.session = boto3.session.Session(region_name='us-east-1')
self.client = self.session.client('s3', endpoint_url=host)
self.session = boto3.session.Session(region_name="us-east-1")
self.client = self.session.client("s3", endpoint_url=host)
def _upload_file_to_s3(self, bucket_name, file_path, s3_path):
logging.debug("Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path)
logging.debug(
"Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path
)
metadata = {}
if os.path.getsize(file_path) < 64 * 1024 * 1024:
if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"):
metadata['ContentType'] = "text/plain; charset=utf-8"
logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path)
if (
s3_path.endswith("txt")
or s3_path.endswith("log")
or s3_path.endswith("err")
or s3_path.endswith("out")
):
metadata["ContentType"] = "text/plain; charset=utf-8"
logging.info(
"Content type %s for file path %s",
"text/plain; charset=utf-8",
file_path,
)
elif s3_path.endswith("html"):
metadata['ContentType'] = "text/html; charset=utf-8"
logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path)
metadata["ContentType"] = "text/html; charset=utf-8"
logging.info(
"Content type %s for file path %s",
"text/html; charset=utf-8",
file_path,
)
elif s3_path.endswith("css"):
metadata['ContentType'] = "text/css; charset=utf-8"
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path)
metadata["ContentType"] = "text/css; charset=utf-8"
logging.info(
"Content type %s for file path %s",
"text/css; charset=utf-8",
file_path,
)
elif s3_path.endswith("js"):
metadata['ContentType'] = "text/javascript; charset=utf-8"
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path)
metadata["ContentType"] = "text/javascript; charset=utf-8"
logging.info(
"Content type %s for file path %s",
"text/css; charset=utf-8",
file_path,
)
else:
logging.info("No content type provied for %s", file_path)
else:
if re.search(r'\.(txt|log|err|out)$', s3_path) or re.search(r'\.log\..*(?<!\.gz)$', s3_path):
logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz")
if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search(
r"\.log\..*(?<!\.gz)$", s3_path
):
logging.info(
"Going to compress file log file %s to %s",
file_path,
file_path + ".gz",
)
compress_file_fast(file_path, file_path + ".gz")
file_path += ".gz"
s3_path += ".gz"
@ -69,14 +98,21 @@ class S3Helper:
logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata)
# last two replacements are specifics of AWS urls:
# https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) \
.replace('+', '%2B').replace(' ', '%20')
return (
"https://s3.amazonaws.com/{bucket}/{path}".format(
bucket=bucket_name, path=s3_path
)
.replace("+", "%2B")
.replace(" ", "%20")
)
def upload_test_report_to_s3(self, file_path, s3_path):
if CI:
return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
else:
return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
return S3Helper.copy_file_to_local(
S3_TEST_REPORTS_BUCKET, file_path, s3_path
)
def upload_build_file_to_s3(self, file_path, s3_path):
if CI:
@ -96,6 +132,7 @@ class S3Helper:
counter = 0
t = time.time()
sum_time = 0
def upload_task(file_path):
nonlocal counter
nonlocal t
@ -104,16 +141,18 @@ class S3Helper:
s3_path = file_path.replace(dir_path, s3_dir_path)
metadata = {}
if s3_path.endswith("html"):
metadata['ContentType'] = "text/html; charset=utf-8"
metadata["ContentType"] = "text/html; charset=utf-8"
elif s3_path.endswith("css"):
metadata['ContentType'] = "text/css; charset=utf-8"
metadata["ContentType"] = "text/css; charset=utf-8"
elif s3_path.endswith("js"):
metadata['ContentType'] = "text/javascript; charset=utf-8"
metadata["ContentType"] = "text/javascript; charset=utf-8"
# Retry
for i in range(5):
try:
self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata)
self.client.upload_file(
file_path, bucket_name, s3_path, ExtraArgs=metadata
)
break
except Exception as ex:
if i == 4:
@ -123,11 +162,22 @@ class S3Helper:
counter += 1
if counter % 1000 == 0:
sum_time += int(time.time() - t)
print("Uploaded", counter, "-", int(time.time() - t), "s", "sum time", sum_time, "s")
print(
"Uploaded",
counter,
"-",
int(time.time() - t),
"s",
"sum time",
sum_time,
"s",
)
t = time.time()
except Exception as ex:
logging.critical("Failed to upload file, expcetion %s", ex)
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path)
return "https://s3.amazonaws.com/{bucket}/{path}".format(
bucket=bucket_name, path=s3_path
)
p = Pool(256)
@ -136,8 +186,20 @@ class S3Helper:
logging.basicConfig(level=logging.INFO)
return result
def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks):
logging.info("Upload folder '%s' to bucket=%s of s3 folder '%s'", folder_path, bucket_name, s3_folder_path)
def _upload_folder_to_s3(
self,
folder_path,
s3_folder_path,
bucket_name,
keep_dirs_in_s3_path,
upload_symlinks,
):
logging.info(
"Upload folder '%s' to bucket=%s of s3 folder '%s'",
folder_path,
bucket_name,
s3_folder_path,
)
if not os.path.exists(folder_path):
return []
files = os.listdir(folder_path)
@ -154,44 +216,81 @@ class S3Helper:
full_s3_path = s3_folder_path
if os.path.isdir(full_fs_path):
return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path,
upload_symlinks)
return self._upload_folder_to_s3(
full_fs_path,
full_s3_path,
bucket_name,
keep_dirs_in_s3_path,
upload_symlinks,
)
if os.path.islink(full_fs_path):
if upload_symlinks:
if CI:
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
return self._upload_file_to_s3(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
else:
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
return S3Helper.copy_file_to_local(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
return []
if CI:
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
return self._upload_file_to_s3(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
else:
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
return S3Helper.copy_file_to_local(
bucket_name, full_fs_path, full_s3_path + "/" + file_name
)
return sorted(_flatten_list(list(p.map(task, files))))
def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True):
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_BUILDS_BUCKET, keep_dirs_in_s3_path,
upload_symlinks)
def upload_build_folder_to_s3(
self,
folder_path,
s3_folder_path,
keep_dirs_in_s3_path=True,
upload_symlinks=True,
):
return self._upload_folder_to_s3(
folder_path,
s3_folder_path,
S3_BUILDS_BUCKET,
keep_dirs_in_s3_path,
upload_symlinks,
)
def upload_test_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True):
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_TEST_REPORTS_BUCKET, keep_dirs_in_s3_path,
upload_symlinks)
def upload_test_folder_to_s3(
self,
folder_path,
s3_folder_path,
keep_dirs_in_s3_path=True,
upload_symlinks=True,
):
return self._upload_folder_to_s3(
folder_path,
s3_folder_path,
S3_TEST_REPORTS_BUCKET,
keep_dirs_in_s3_path,
upload_symlinks,
)
def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET):
objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path)
result = []
if 'Contents' in objects:
for obj in objects['Contents']:
result.append(obj['Key'])
if "Contents" in objects:
for obj in objects["Contents"]:
result.append(obj["Key"])
return result
@staticmethod
def copy_file_to_local(bucket_name, file_path, s3_path):
local_path = os.path.abspath(os.path.join(RUNNER_TEMP, 's3', bucket_name, s3_path))
local_path = os.path.abspath(
os.path.join(RUNNER_TEMP, "s3", bucket_name, s3_path)
)
local_dir = os.path.dirname(local_path)
if not os.path.exists(local_dir):
os.makedirs(local_dir)

View File

@ -23,19 +23,20 @@ from rerun_helper import RerunHelper
DOCKER_IMAGE = "clickhouse/split-build-smoke-test"
DOWNLOAD_RETRIES_COUNT = 5
RESULT_LOG_NAME = "run.log"
CHECK_NAME = 'Split build smoke test (actions)'
CHECK_NAME = "Split build smoke test (actions)"
def process_result(result_folder, server_log_folder):
status = "success"
description = 'Server started and responded'
description = "Server started and responded"
summary = [("Smoke test", "OK")]
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
lines = run_log.read().split('\n')
if not lines or lines[0].strip() != 'OK':
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
lines = run_log.read().split("\n")
if not lines or lines[0].strip() != "OK":
status = "failure"
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
logging.info("Lines is not ok: %s", str("\n".join(lines)))
summary = [("Smoke test", "FAIL")]
description = 'Server failed to respond, see result in logs'
description = "Server failed to respond, see result in logs"
result_logs = []
server_log_path = os.path.join(server_log_folder, "clickhouse-server.log")
@ -43,17 +44,25 @@ def process_result(result_folder, server_log_folder):
client_stderr_log_path = os.path.join(result_folder, "clientstderr.log")
run_log_path = os.path.join(result_folder, RESULT_LOG_NAME)
for path in [server_log_path, stderr_log_path, client_stderr_log_path, run_log_path]:
for path in [
server_log_path,
stderr_log_path,
client_stderr_log_path,
run_log_path,
]:
if os.path.exists(path):
result_logs.append(path)
return status, description, summary, result_logs
def get_run_command(build_path, result_folder, server_log_folder, docker_image):
return f"docker run --network=host --volume={build_path}:/package_folder" \
f" --volume={server_log_folder}:/var/log/clickhouse-server" \
f" --volume={result_folder}:/test_output" \
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}"
return (
f"docker run --network=host --volume={build_path}:/package_folder"
f" --volume={server_log_folder}:/var/log/clickhouse-server"
f" --volume={result_folder}:/test_output"
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}"
)
if __name__ == "__main__":
@ -76,8 +85,8 @@ if __name__ == "__main__":
for root, _, files in os.walk(reports_path):
for f in files:
if f == 'changed_images.json':
images_path = os.path.join(root, 'changed_images.json')
if f == "changed_images.json":
images_path = os.path.join(root, "changed_images.json")
break
docker_image = get_image_with_version(reports_path, DOCKER_IMAGE)
@ -96,7 +105,9 @@ if __name__ == "__main__":
if not os.path.exists(result_path):
os.makedirs(result_path)
run_command = get_run_command(packages_path, result_path, server_log_path, docker_image)
run_command = get_run_command(
packages_path, result_path, server_log_path, docker_image
)
logging.info("Going to run command %s", run_command)
with subprocess.Popen(run_command, shell=True) as process:
@ -110,13 +121,30 @@ if __name__ == "__main__":
print("Result path", os.listdir(result_path))
print("Server log path", os.listdir(server_log_path))
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
state, description, test_results, additional_logs = process_result(
result_path, server_log_path
)
ch_helper = ClickHouseHelper()
s3_helper = S3Helper('https://s3.amazonaws.com')
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
s3_helper = S3Helper("https://s3.amazonaws.com")
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
additional_logs,
CHECK_NAME,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
CHECK_NAME,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -27,15 +27,19 @@ class SSHAgent:
self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS")
# set ENV from stdout of ssh-agent
for line in self._run(['ssh-agent']).splitlines():
for line in self._run(["ssh-agent"]).splitlines():
name, _, value = line.partition(b"=")
if _ == b"=":
value = value.split(b";", 1)[0]
self._env[name.decode()] = value.decode()
os.environ[name.decode()] = value.decode()
ssh_options = "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
os.environ["SSH_OPTIONS"] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
ssh_options = (
"," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
)
os.environ[
"SSH_OPTIONS"
] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
def add(self, key):
key_pub = self._key_pub(key)
@ -89,7 +93,13 @@ class SSHAgent:
@staticmethod
def _run(cmd, stdin=None):
shell = isinstance(cmd, str)
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE if stdin else None, shell=shell) as p:
with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE if stdin else None,
shell=shell,
) as p:
stdout, stderr = p.communicate(stdin)
if stdout.strip().decode() == "The agent has no identities.":
@ -101,6 +111,7 @@ class SSHAgent:
return stdout
class SSHKey:
def __init__(self, key_name=None, key_value=None):
if key_name is None and key_value is None:

View File

@ -2,7 +2,8 @@
import datetime
class Stopwatch():
class Stopwatch:
def __init__(self):
self.start_time = datetime.datetime.utcnow()
self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S")

View File

@ -8,18 +8,19 @@ import json
import time
from collections import namedtuple
def get_key_and_app_from_aws():
import boto3
secret_name = "clickhouse_github_secret_key"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
service_name="secretsmanager",
)
get_secret_value_response = client.get_secret_value(
SecretId=secret_name
)
data = json.loads(get_secret_value_response['SecretString'])
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
data = json.loads(get_secret_value_response["SecretString"])
return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
def get_installation_id(jwt_token):
headers = {
@ -29,117 +30,152 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]['id']
return data[0]["id"]
def get_access_token(jwt_token, installation_id):
headers = {
"Authorization": f"Bearer {jwt_token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
response = requests.post(
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
headers=headers,
)
response.raise_for_status()
data = response.json()
return data['token']
return data["token"]
RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy'])
RunnerDescription = namedtuple(
"RunnerDescription", ["id", "name", "tags", "offline", "busy"]
)
def list_runners(access_token):
headers = {
"Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers)
response = requests.get(
"https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100",
headers=headers,
)
response.raise_for_status()
data = response.json()
total_runners = data['total_count']
runners = data['runners']
total_runners = data["total_count"]
runners = data["runners"]
total_pages = int(total_runners / 100 + 1)
for i in range(2, total_pages + 1):
response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers)
response = requests.get(
f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100",
headers=headers,
)
response.raise_for_status()
data = response.json()
runners += data['runners']
runners += data["runners"]
print("Total runners", len(runners))
result = []
for runner in runners:
tags = [tag['name'] for tag in runner['labels']]
desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags,
offline=runner['status']=='offline', busy=runner['busy'])
tags = [tag["name"] for tag in runner["labels"]]
desc = RunnerDescription(
id=runner["id"],
name=runner["name"],
tags=tags,
offline=runner["status"] == "offline",
busy=runner["busy"],
)
result.append(desc)
return result
def push_metrics_to_cloudwatch(listed_runners, namespace):
import boto3
client = boto3.client('cloudwatch')
client = boto3.client("cloudwatch")
metrics_data = []
busy_runners = sum(1 for runner in listed_runners if runner.busy)
metrics_data.append({
'MetricName': 'BusyRunners',
'Value': busy_runners,
'Unit': 'Count',
})
metrics_data.append(
{
"MetricName": "BusyRunners",
"Value": busy_runners,
"Unit": "Count",
}
)
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
metrics_data.append({
'MetricName': 'ActiveRunners',
'Value': total_active_runners,
'Unit': 'Count',
})
metrics_data.append(
{
"MetricName": "ActiveRunners",
"Value": total_active_runners,
"Unit": "Count",
}
)
total_runners = len(listed_runners)
metrics_data.append({
'MetricName': 'TotalRunners',
'Value': total_runners,
'Unit': 'Count',
})
metrics_data.append(
{
"MetricName": "TotalRunners",
"Value": total_runners,
"Unit": "Count",
}
)
if total_active_runners == 0:
busy_ratio = 100
else:
busy_ratio = busy_runners / total_active_runners * 100
metrics_data.append({
'MetricName': 'BusyRunnersRatio',
'Value': busy_ratio,
'Unit': 'Percent',
})
metrics_data.append(
{
"MetricName": "BusyRunnersRatio",
"Value": busy_ratio,
"Unit": "Percent",
}
)
client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data)
client.put_metric_data(Namespace="RunnersMetrics", MetricData=metrics_data)
def how_many_instances_to_kill(event_data):
data_array = event_data['CapacityToTerminate']
data_array = event_data["CapacityToTerminate"]
to_kill_by_zone = {}
for av_zone in data_array:
zone_name = av_zone['AvailabilityZone']
to_kill = av_zone['Capacity']
zone_name = av_zone["AvailabilityZone"]
to_kill = av_zone["Capacity"]
if zone_name not in to_kill_by_zone:
to_kill_by_zone[zone_name] = 0
to_kill_by_zone[zone_name] += to_kill
return to_kill_by_zone
def get_candidates_to_be_killed(event_data):
data_array = event_data['Instances']
data_array = event_data["Instances"]
instances_by_zone = {}
for instance in data_array:
zone_name = instance['AvailabilityZone']
instance_id = instance['InstanceId']
zone_name = instance["AvailabilityZone"]
instance_id = instance["InstanceId"]
if zone_name not in instances_by_zone:
instances_by_zone[zone_name] = []
instances_by_zone[zone_name].append(instance_id)
return instances_by_zone
def delete_runner(access_token, runner):
headers = {
"Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers)
response = requests.delete(
f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}",
headers=headers,
)
response.raise_for_status()
print(f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}")
print(
f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}"
)
return response.status_code == 204
@ -166,12 +202,16 @@ def main(github_secret_key, github_app_id, event):
num_to_kill = to_kill_by_zone[zone]
candidates = instances_by_zone[zone]
if num_to_kill > len(candidates):
raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}")
raise Exception(
f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
)
delete_for_av = []
for candidate in candidates:
if candidate not in set([runner.name for runner in runners]):
print(f"Candidate {candidate} was not in runners list, simply delete it")
print(
f"Candidate {candidate} was not in runners list, simply delete it"
)
instances_to_kill.append(candidate)
for candidate in candidates:
@ -183,57 +223,76 @@ def main(github_secret_key, github_app_id, event):
for runner in runners:
if runner.name == candidate:
if not runner.busy:
print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}")
print(
f"Runner {runner.name} is not busy and can be deleted from AV {zone}"
)
delete_for_av.append(runner)
else:
print(f"Runner {runner.name} is busy, not going to delete it")
break
if len(delete_for_av) < num_to_kill:
print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}")
print(
f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
)
to_delete_runners += delete_for_av
print("Got instances to kill: ", ', '.join(instances_to_kill))
print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners]))
print("Got instances to kill: ", ", ".join(instances_to_kill))
print(
"Going to delete runners:",
", ".join([runner.name for runner in to_delete_runners]),
)
for runner in to_delete_runners:
if delete_runner(access_token, runner):
print(f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github")
print(
f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github"
)
instances_to_kill.append(runner.name)
else:
print(f"Cannot delete {runner.name} from github")
## push metrics
#runners = list_runners(access_token)
#push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
# runners = list_runners(access_token)
# push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
response = {
"InstanceIDs": instances_to_kill
}
response = {"InstanceIDs": instances_to_kill}
print(response)
return response
def handler(event, context):
private_key, app_id = get_key_and_app_from_aws()
return main(private_key, app_id, event)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get list of runners and their states')
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
parser.add_argument('-k', '--private-key', help='Private key')
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
parser = argparse.ArgumentParser(description="Get list of runners and their states")
parser.add_argument(
"-p", "--private-key-path", help="Path to file with private key"
)
parser.add_argument("-k", "--private-key", help="Private key")
parser.add_argument(
"-a", "--app-id", type=int, help="GitHub application ID", required=True
)
args = parser.parse_args()
if not args.private_key_path and not args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key_path and args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key:
private_key = args.private_key
else:
with open(args.private_key_path, 'r') as key_file:
with open(args.private_key_path, "r") as key_file:
private_key = key_file.read()
sample_event = {
@ -243,41 +302,41 @@ if __name__ == "__main__":
{
"AvailabilityZone": "us-east-1b",
"Capacity": 1,
"InstanceMarketOption": "OnDemand"
"InstanceMarketOption": "OnDemand",
},
{
"AvailabilityZone": "us-east-1c",
"Capacity": 2,
"InstanceMarketOption": "OnDemand"
}
"InstanceMarketOption": "OnDemand",
},
],
"Instances": [
{
"AvailabilityZone": "us-east-1b",
"InstanceId": "i-08d0b3c1a137e02a5",
"InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand"
"InstanceMarketOption": "OnDemand",
},
{
"AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
"InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand"
"InstanceMarketOption": "OnDemand",
},
{
"AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal",
"InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand"
"InstanceMarketOption": "OnDemand",
},
{
"AvailabilityZone": "us-east-1c",
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
"InstanceType": "t2.nano",
"InstanceMarketOption": "OnDemand"
}
"InstanceMarketOption": "OnDemand",
},
],
"Cause": "SCALE_IN"
"Cause": "SCALE_IN",
}
main(private_key, args.app_id, sample_event)

View File

@ -7,6 +7,7 @@ import sys
import json
import time
def get_installation_id(jwt_token):
headers = {
"Authorization": f"Bearer {jwt_token}",
@ -15,40 +16,48 @@ def get_installation_id(jwt_token):
response = requests.get("https://api.github.com/app/installations", headers=headers)
response.raise_for_status()
data = response.json()
return data[0]['id']
return data[0]["id"]
def get_access_token(jwt_token, installation_id):
headers = {
"Authorization": f"Bearer {jwt_token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
response = requests.post(
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
headers=headers,
)
response.raise_for_status()
data = response.json()
return data['token']
return data["token"]
def get_runner_registration_token(access_token):
headers = {
"Authorization": f"token {access_token}",
"Accept": "application/vnd.github.v3+json",
}
response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers)
response = requests.post(
"https://api.github.com/orgs/ClickHouse/actions/runners/registration-token",
headers=headers,
)
response.raise_for_status()
data = response.json()
return data['token']
return data["token"]
def get_key_and_app_from_aws():
import boto3
secret_name = "clickhouse_github_secret_key"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
service_name="secretsmanager",
)
get_secret_value_response = client.get_secret_value(
SecretId=secret_name
)
data = json.loads(get_secret_value_response['SecretString'])
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
data = json.loads(get_secret_value_response["SecretString"])
return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
@ -67,40 +76,65 @@ def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
import boto3
print("Trying to put params into ssm manager")
client = boto3.client('ssm')
client = boto3.client("ssm")
client.put_parameter(
Name=ssm_parameter_name,
Value=runner_registration_token,
Type='SecureString',
Overwrite=True)
Type="SecureString",
Overwrite=True,
)
else:
print("Not push token to AWS Parameter Store, just print:", runner_registration_token)
print(
"Not push token to AWS Parameter Store, just print:",
runner_registration_token,
)
def handler(event, context):
private_key, app_id = get_key_and_app_from_aws()
main(private_key, app_id, True, 'github_runner_registration_token')
main(private_key, app_id, True, "github_runner_registration_token")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get new token from github to add runners')
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
parser.add_argument('-k', '--private-key', help='Private key')
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store')
parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name')
parser = argparse.ArgumentParser(
description="Get new token from github to add runners"
)
parser.add_argument(
"-p", "--private-key-path", help="Path to file with private key"
)
parser.add_argument("-k", "--private-key", help="Private key")
parser.add_argument(
"-a", "--app-id", type=int, help="GitHub application ID", required=True
)
parser.add_argument(
"--push-to-ssm",
action="store_true",
help="Store received token in parameter store",
)
parser.add_argument(
"--ssm-parameter-name",
default="github_runner_registration_token",
help="AWS paramater store parameter name",
)
args = parser.parse_args()
if not args.private_key_path and not args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key_path and args.private_key:
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
print(
"Either --private-key-path or --private-key must be specified",
file=sys.stderr,
)
if args.private_key:
private_key = args.private_key
else:
with open(args.private_key_path, 'r') as key_file:
with open(args.private_key_path, "r") as key_file:
private_key = key_file.read()
main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name)

View File

@ -15,32 +15,38 @@ from build_download_helper import download_unit_tests
from upload_result_helper import upload_results
from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
from clickhouse_helper import (
ClickHouseHelper,
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
from tee_popen import TeePopen
IMAGE_NAME = 'clickhouse/unit-test'
IMAGE_NAME = "clickhouse/unit-test"
def get_test_name(line):
elements = reversed(line.split(' '))
elements = reversed(line.split(" "))
for element in elements:
if '(' not in element and ')' not in element:
if "(" not in element and ")" not in element:
return element
raise Exception(f"No test name in line '{line}'")
def process_result(result_folder):
OK_SIGN = 'OK ]'
FAILED_SIGN = 'FAILED ]'
SEGFAULT = 'Segmentation fault'
SIGNAL = 'received signal SIG'
PASSED = 'PASSED'
OK_SIGN = "OK ]"
FAILED_SIGN = "FAILED ]"
SEGFAULT = "Segmentation fault"
SIGNAL = "received signal SIG"
PASSED = "PASSED"
summary = []
total_counter = 0
failed_counter = 0
result_log_path = f'{result_folder}/test_result.txt'
result_log_path = f"{result_folder}/test_result.txt"
if not os.path.exists(result_log_path):
logging.info("No output log on path %s", result_log_path)
return "error", "No output log", summary, []
@ -48,7 +54,7 @@ def process_result(result_folder):
status = "success"
description = ""
passed = False
with open(result_log_path, 'r', encoding='utf-8') as test_result:
with open(result_log_path, "r", encoding="utf-8") as test_result:
for line in test_result:
if OK_SIGN in line:
logging.info("Found ok line: '%s'", line)
@ -56,7 +62,7 @@ def process_result(result_folder):
logging.info("Test name: '%s'", test_name)
summary.append((test_name, "OK"))
total_counter += 1
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
logging.info("Found fail line: '%s'", line)
test_name = get_test_name(line.strip())
logging.info("Test name: '%s'", test_name)
@ -85,7 +91,9 @@ def process_result(result_folder):
status = "failure"
if not description:
description += f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
description += (
f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
)
return status, description, summary, [result_log_path]
@ -139,15 +147,30 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
s3_helper = S3Helper('https://s3.amazonaws.com')
s3_helper = S3Helper("https://s3.amazonaws.com")
state, description, test_results, additional_logs = process_result(test_output)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, check_name, test_results)
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
[run_log_path] + additional_logs,
check_name,
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
state,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)

View File

@ -6,7 +6,9 @@ from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID
from report import ReportColorTheme, create_test_html_report
def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs):
def process_logs(
s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
):
processed_logs = {}
# Firstly convert paths of logs from test_results to urls to s3.
for test_result in test_results:
@ -21,8 +23,8 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
test_log_urls.append(processed_logs[log_path])
elif log_path:
url = s3_client.upload_test_report_to_s3(
log_path,
s3_path_prefix + "/" + os.path.basename(log_path))
log_path, s3_path_prefix + "/" + os.path.basename(log_path)
)
test_log_urls.append(url)
processed_logs[log_path] = url
@ -33,15 +35,29 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
if log_path:
additional_urls.append(
s3_client.upload_test_report_to_s3(
log_path,
s3_path_prefix + "/" + os.path.basename(log_path)))
log_path, s3_path_prefix + "/" + os.path.basename(log_path)
)
)
return additional_urls
def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files, check_name, with_raw_logs=True, statuscolors=None):
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_')
additional_urls = process_logs(s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs)
def upload_results(
s3_client,
pr_number,
commit_sha,
test_results,
additional_files,
check_name,
with_raw_logs=True,
statuscolors=None,
):
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(
" ", "_"
).replace("(", "_").replace(")", "_").replace(",", "_")
additional_urls = process_logs(
s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs
)
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
branch_name = "master"
@ -58,14 +74,25 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi
else:
raw_log_url = task_url
statuscolors = ReportColorTheme.bugfixcheck if 'bugfix validate check' in check_name else None
statuscolors = (
ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None
)
html_report = create_test_html_report(check_name, test_results, raw_log_url,
task_url, branch_url, branch_name, commit_url,
additional_urls, with_raw_logs, statuscolors=statuscolors)
with open('report.html', 'w', encoding='utf-8') as f:
html_report = create_test_html_report(
check_name,
test_results,
raw_log_url,
task_url,
branch_url,
branch_name,
commit_url,
additional_urls,
with_raw_logs,
statuscolors=statuscolors,
)
with open("report.html", "w", encoding="utf-8") as f:
f.write(html_report)
url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html")
url = s3_client.upload_test_report_to_s3("report.html", s3_path_prefix + ".html")
logging.info("Search result in url %s", url)
return url

View File

@ -27,7 +27,7 @@ MAX_TIME_SECONDS = 3600
MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes
TASK_TIMEOUT = 8 * 60 * 60 # 8 hours
NO_CHANGES_MSG = 'Nothing to run'
NO_CHANGES_MSG = "Nothing to run"
def stringhash(s):
@ -209,7 +209,9 @@ class ClickhouseIntegrationTestsRunner:
self.image_versions = self.params["docker_images_with_versions"]
self.shuffle_groups = self.params["shuffle_test_groups"]
self.flaky_check = "flaky check" in self.params["context_name"]
self.bugfix_validate_check = "bugfix validate check" in self.params["context_name"]
self.bugfix_validate_check = (
"bugfix validate check" in self.params["context_name"]
)
# if use_tmpfs is not set we assume it to be true, otherwise check
self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"]
self.disable_net_host = (
@ -780,7 +782,9 @@ class ClickhouseIntegrationTestsRunner:
def run_impl(self, repo_path, build_path):
if self.flaky_check or self.bugfix_validate_check:
return self.run_flaky_check(repo_path, build_path, should_fail=self.bugfix_validate_check)
return self.run_flaky_check(
repo_path, build_path, should_fail=self.bugfix_validate_check
)
self._install_clickhouse(build_path)
logging.info(

View File

@ -5,23 +5,34 @@ import os
from helpers.test_tools import TSV
from helpers.network import _NetworkManager
@pytest.fixture(autouse=True, scope="session")
def cleanup_environment():
try:
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1:
logging.debug(f"Cleaning all iptables rules")
_NetworkManager.clean_all_user_iptables_rules()
result = run_and_check(['docker ps | wc -l'], shell=True)
result = run_and_check(["docker ps | wc -l"], shell=True)
if int(result) > 1:
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1:
logging.warning(f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"\
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup.")
logging.warning(
f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup."
)
else:
logging.debug("Trying to kill unstopped containers...")
run_and_check([f'docker kill $(docker container list --all --quiet)'], shell=True, nothrow=True)
run_and_check([f'docker rm $docker container list --all --quiet)'], shell=True, nothrow=True)
run_and_check(
[f"docker kill $(docker container list --all --quiet)"],
shell=True,
nothrow=True,
)
run_and_check(
[f"docker rm $docker container list --all --quiet)"],
shell=True,
nothrow=True,
)
logging.debug("Unstopped containers killed")
r = run_and_check(['docker-compose', 'ps', '--services', '--all'])
r = run_and_check(["docker-compose", "ps", "--services", "--all"])
logging.debug(f"Docker ps before start:{r.stdout}")
else:
logging.debug(f"No running containers")
@ -31,8 +42,14 @@ def cleanup_environment():
yield
def pytest_addoption(parser):
parser.addoption("--run-id", default="", help="run-id is used as postfix in _instances_{} directory")
parser.addoption(
"--run-id",
default="",
help="run-id is used as postfix in _instances_{} directory",
)
def pytest_configure(config):
os.environ['INTEGRATION_TESTS_RUN_ID'] = config.option.run_id
os.environ["INTEGRATION_TESTS_RUN_ID"] = config.option.run_id

View File

@ -6,79 +6,117 @@ from threading import Timer
class Client:
def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'):
def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"):
self.host = host
self.port = port
self.command = [command]
if os.path.basename(command) == 'clickhouse':
self.command.append('client')
if os.path.basename(command) == "clickhouse":
self.command.append("client")
self.command += ['--host', self.host, '--port', str(self.port), '--stacktrace']
self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"]
def query(self, sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
ignore_error=False,
query_id=None):
return self.get_query_request(sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
ignore_error=ignore_error,
query_id=query_id).get_answer()
def query(
self,
sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
ignore_error=False,
query_id=None,
):
return self.get_query_request(
sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
ignore_error=ignore_error,
query_id=query_id,
).get_answer()
def get_query_request(self, sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
ignore_error=False,
query_id=None):
def get_query_request(
self,
sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
ignore_error=False,
query_id=None,
):
command = self.command[:]
if stdin is None:
command += ['--multiquery', '--testmode']
command += ["--multiquery", "--testmode"]
stdin = sql
else:
command += ['--query', sql]
command += ["--query", sql]
if settings is not None:
for setting, value in settings.items():
command += ['--' + setting, str(value)]
command += ["--" + setting, str(value)]
if user is not None:
command += ['--user', user]
command += ["--user", user]
if password is not None:
command += ['--password', password]
command += ["--password", password]
if database is not None:
command += ['--database', database]
command += ["--database", database]
if query_id is not None:
command += ['--query_id', query_id]
command += ["--query_id", query_id]
return CommandRequest(command, stdin, timeout, ignore_error)
def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None,
database=None):
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user,
password=password, database=database).get_error()
def query_and_get_error(
self,
sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
):
return self.get_query_request(
sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
).get_error()
def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None,
database=None):
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user,
password=password, database=database).get_answer_and_error()
def query_and_get_answer_with_error(
self,
sql,
stdin=None,
timeout=None,
settings=None,
user=None,
password=None,
database=None,
):
return self.get_query_request(
sql,
stdin=stdin,
timeout=timeout,
settings=settings,
user=user,
password=password,
database=database,
).get_answer_and_error()
class QueryTimeoutExceedException(Exception):
@ -95,7 +133,7 @@ class QueryRuntimeException(Exception):
class CommandRequest:
def __init__(self, command, stdin=None, timeout=None, ignore_error=False):
# Write data to tmp file to avoid PIPEs and execution blocking
stdin_file = tempfile.TemporaryFile(mode='w+')
stdin_file = tempfile.TemporaryFile(mode="w+")
stdin_file.write(stdin)
stdin_file.seek(0)
self.stdout_file = tempfile.TemporaryFile()
@ -108,11 +146,19 @@ class CommandRequest:
# can print some debug information there
env = {}
env["TSAN_OPTIONS"] = "verbosity=0"
self.process = sp.Popen(command, stdin=stdin_file, stdout=self.stdout_file, stderr=self.stderr_file, env=env, universal_newlines=True)
self.process = sp.Popen(
command,
stdin=stdin_file,
stdout=self.stdout_file,
stderr=self.stderr_file,
env=env,
universal_newlines=True,
)
self.timer = None
self.process_finished_before_timeout = True
if timeout is not None:
def kill_process():
if self.process.poll() is None:
self.process_finished_before_timeout = False
@ -126,16 +172,25 @@ class CommandRequest:
self.stdout_file.seek(0)
self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
if (
self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}")
raise QueryTimeoutExceedException('Client timed out!')
raise QueryTimeoutExceedException("Client timed out!")
if (self.process.returncode != 0 or stderr) and not self.ignore_error:
raise QueryRuntimeException(
'Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr), self.process.returncode, stderr)
"Client failed! Return code: {}, stderr: {}".format(
self.process.returncode, stderr
),
self.process.returncode,
stderr,
)
return stdout
@ -144,14 +199,22 @@ class CommandRequest:
self.stdout_file.seek(0)
self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
raise QueryTimeoutExceedException('Client timed out!')
if (
self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
raise QueryTimeoutExceedException("Client timed out!")
if (self.process.returncode == 0):
raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout), self.process.returncode, stderr)
if self.process.returncode == 0:
raise QueryRuntimeException(
"Client expected to be failed but succeeded! stdout: {}".format(stdout),
self.process.returncode,
stderr,
)
return stderr
@ -160,10 +223,14 @@ class CommandRequest:
self.stdout_file.seek(0)
self.stderr_file.seek(0)
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
raise QueryTimeoutExceedException('Client timed out!')
if (
self.timer is not None
and not self.process_finished_before_timeout
and not self.ignore_error
):
raise QueryTimeoutExceedException("Client timed out!")
return (stdout, stderr)

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,29 @@
def corrupt_part_data_on_disk(node, table, part_name):
part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'"
.format(table, part_name)).strip()
part_path = node.query(
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
table, part_name
)
).strip()
corrupt_part_data_by_path(node, part_path)
def corrupt_part_data_by_path(node, part_path):
print("Corrupting part", part_path, "at", node.name)
print("Will corrupt: ",
node.exec_in_container(['bash', '-c', 'cd {p} && ls *.bin | head -n 1'.format(p=part_path)]))
print(
"Will corrupt: ",
node.exec_in_container(
["bash", "-c", "cd {p} && ls *.bin | head -n 1".format(p=part_path)]
),
)
node.exec_in_container(['bash', '-c',
'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
p=part_path)], privileged=True)
node.exec_in_container(
[
"bash",
"-c",
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
p=part_path
),
],
privileged=True,
)

View File

@ -4,18 +4,18 @@ import copy
class Layout(object):
LAYOUTS_STR_DICT = {
'flat': '<flat/>',
'hashed': '<hashed/>',
'cache': '<cache><size_in_cells>128</size_in_cells></cache>',
'ssd_cache': '<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>',
'complex_key_hashed': '<complex_key_hashed/>',
'complex_key_hashed_one_key': '<complex_key_hashed/>',
'complex_key_hashed_two_keys': '<complex_key_hashed/>',
'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>',
'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>',
'range_hashed': '<range_hashed/>',
'direct': '<direct/>',
'complex_key_direct': '<complex_key_direct/>'
"flat": "<flat/>",
"hashed": "<hashed/>",
"cache": "<cache><size_in_cells>128</size_in_cells></cache>",
"ssd_cache": "<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>",
"complex_key_hashed": "<complex_key_hashed/>",
"complex_key_hashed_one_key": "<complex_key_hashed/>",
"complex_key_hashed_two_keys": "<complex_key_hashed/>",
"complex_key_cache": "<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>",
"complex_key_ssd_cache": "<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>",
"range_hashed": "<range_hashed/>",
"direct": "<direct/>",
"complex_key_direct": "<complex_key_direct/>",
}
def __init__(self, name):
@ -23,14 +23,14 @@ class Layout(object):
self.is_complex = False
self.is_simple = False
self.is_ranged = False
if self.name.startswith('complex'):
self.layout_type = 'complex'
if self.name.startswith("complex"):
self.layout_type = "complex"
self.is_complex = True
elif name.startswith('range'):
self.layout_type = 'ranged'
elif name.startswith("range"):
self.layout_type = "ranged"
self.is_ranged = True
else:
self.layout_type = 'simple'
self.layout_type = "simple"
self.is_simple = True
def get_str(self):
@ -38,8 +38,8 @@ class Layout(object):
def get_key_block_name(self):
if self.is_complex:
return 'key'
return 'id'
return "key"
return "id"
class Row(object):
@ -59,8 +59,17 @@ class Row(object):
class Field(object):
def __init__(self, name, field_type, is_key=False, is_range_key=False, default=None, hierarchical=False,
range_hash_type=None, default_value_for_get=None):
def __init__(
self,
name,
field_type,
is_key=False,
is_range_key=False,
default=None,
hierarchical=False,
range_hash_type=None,
default_value_for_get=None,
):
self.name = name
self.field_type = field_type
self.is_key = is_key
@ -72,30 +81,32 @@ class Field(object):
self.default_value_for_get = default_value_for_get
def get_attribute_str(self):
return '''
return """
<attribute>
<name>{name}</name>
<type>{field_type}</type>
<null_value>{default}</null_value>
<hierarchical>{hierarchical}</hierarchical>
</attribute>'''.format(
</attribute>""".format(
name=self.name,
field_type=self.field_type,
default=self.default if self.default else '',
hierarchical='true' if self.hierarchical else 'false',
default=self.default if self.default else "",
hierarchical="true" if self.hierarchical else "false",
)
def get_simple_index_str(self):
return '<name>{name}</name>'.format(name=self.name)
return "<name>{name}</name>".format(name=self.name)
def get_range_hash_str(self):
if not self.range_hash_type:
raise Exception("Field {} is not range hashed".format(self.name))
return '''
return """
<range_{type}>
<name>{name}</name>
</range_{type}>
'''.format(type=self.range_hash_type, name=self.name)
""".format(
type=self.range_hash_type, name=self.name
)
class DictionaryStructure(object):
@ -125,9 +136,14 @@ class DictionaryStructure(object):
if not self.layout.is_complex and len(self.keys) > 1:
raise Exception(
"More than one key {} field in non complex layout {}".format(len(self.keys), self.layout.name))
"More than one key {} field in non complex layout {}".format(
len(self.keys), self.layout.name
)
)
if self.layout.is_ranged and (not self.range_key or len(self.range_fields) != 2):
if self.layout.is_ranged and (
not self.range_key or len(self.range_fields) != 2
):
raise Exception("Inconsistent configuration of ranged dictionary")
def get_structure_str(self):
@ -148,7 +164,7 @@ class DictionaryStructure(object):
for range_field in self.range_fields:
ranged_strs.append(range_field.get_range_hash_str())
return '''
return """
<layout>
{layout_str}
</layout>
@ -158,12 +174,12 @@ class DictionaryStructure(object):
</{key_block_name}>
{range_strs}
{attributes_str}
</structure>'''.format(
</structure>""".format(
layout_str=self.layout.get_str(),
key_block_name=self.layout.get_key_block_name(),
key_str='\n'.join(key_strs),
attributes_str='\n'.join(fields_strs),
range_strs='\n'.join(ranged_strs),
key_str="\n".join(key_strs),
attributes_str="\n".join(fields_strs),
range_strs="\n".join(ranged_strs),
)
def get_ordered_names(self):
@ -179,15 +195,19 @@ class DictionaryStructure(object):
def get_all_fields(self):
return self.keys + self.range_fields + self.ordinary_fields
def _get_dict_get_common_expression(self, dict_name, field, row, or_default, with_type, has):
def _get_dict_get_common_expression(
self, dict_name, field, row, or_default, with_type, has
):
if field in self.keys:
raise Exception("Trying to receive key field {} from dictionary".format(field.name))
raise Exception(
"Trying to receive key field {} from dictionary".format(field.name)
)
if not self.layout.is_complex:
if not or_default:
key_expr = ', toUInt64({})'.format(row.data[self.keys[0].name])
key_expr = ", toUInt64({})".format(row.data[self.keys[0].name])
else:
key_expr = ', toUInt64({})'.format(self.keys[0].default_value_for_get)
key_expr = ", toUInt64({})".format(self.keys[0].default_value_for_get)
else:
key_exprs_strs = []
for key in self.keys:
@ -197,48 +217,57 @@ class DictionaryStructure(object):
val = key.default_value_for_get
if isinstance(val, str):
val = "'" + val + "'"
key_exprs_strs.append('to{type}({value})'.format(type=key.field_type, value=val))
key_expr = ', tuple(' + ','.join(key_exprs_strs) + ')'
key_exprs_strs.append(
"to{type}({value})".format(type=key.field_type, value=val)
)
key_expr = ", tuple(" + ",".join(key_exprs_strs) + ")"
date_expr = ''
date_expr = ""
if self.layout.is_ranged:
val = row.data[self.range_key.name]
if isinstance(val, str):
val = "'" + val + "'"
val = "to{type}({val})".format(type=self.range_key.field_type, val=val)
date_expr = ', ' + val
date_expr = ", " + val
if or_default:
raise Exception("Can create 'dictGetOrDefault' query for ranged dictionary")
raise Exception(
"Can create 'dictGetOrDefault' query for ranged dictionary"
)
if or_default:
or_default_expr = 'OrDefault'
or_default_expr = "OrDefault"
if field.default_value_for_get is None:
raise Exception(
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(field.name))
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(
field.name
)
)
val = field.default_value_for_get
if isinstance(val, str):
val = "'" + val + "'"
default_value_for_get = ', to{type}({value})'.format(type=field.field_type, value=val)
default_value_for_get = ", to{type}({value})".format(
type=field.field_type, value=val
)
else:
or_default_expr = ''
default_value_for_get = ''
or_default_expr = ""
default_value_for_get = ""
if with_type:
field_type = field.field_type
else:
field_type = ''
field_type = ""
field_name = ", '" + field.name + "'"
if has:
what = "Has"
field_type = ''
or_default = ''
field_name = ''
date_expr = ''
def_for_get = ''
field_type = ""
or_default = ""
field_name = ""
date_expr = ""
def_for_get = ""
else:
what = "Get"
@ -255,28 +284,38 @@ class DictionaryStructure(object):
def get_get_expressions(self, dict_name, field, row):
return [
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, has=False),
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=True, has=False),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=False, has=False
),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=True, has=False
),
]
def get_get_or_default_expressions(self, dict_name, field, row):
if not self.layout.is_ranged:
return [
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=False,
has=False),
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=True, has=False),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=True, with_type=False, has=False
),
self._get_dict_get_common_expression(
dict_name, field, row, or_default=True, with_type=True, has=False
),
]
return []
def get_has_expressions(self, dict_name, field, row):
if not self.layout.is_ranged:
return [self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False,
has=True)]
return [
self._get_dict_get_common_expression(
dict_name, field, row, or_default=False, with_type=False, has=True
)
]
return []
def get_hierarchical_expressions(self, dict_name, row):
if self.layout.is_simple:
key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
return [
"dictGetHierarchy('{dict_name}', {key})".format(
dict_name=dict_name,
@ -288,21 +327,31 @@ class DictionaryStructure(object):
def get_is_in_expressions(self, dict_name, row, parent_row):
if self.layout.is_simple:
child_key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
parent_key_expr = 'toUInt64({})'.format(parent_row.data[self.keys[0].name])
child_key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
parent_key_expr = "toUInt64({})".format(parent_row.data[self.keys[0].name])
return [
"dictIsIn('{dict_name}', {child_key}, {parent_key})".format(
dict_name=dict_name,
child_key=child_key_expr,
parent_key=parent_key_expr, )
parent_key=parent_key_expr,
)
]
return []
class Dictionary(object):
def __init__(self, name, structure, source, config_path,
table_name, fields, min_lifetime=3, max_lifetime=5):
def __init__(
self,
name,
structure,
source,
config_path,
table_name,
fields,
min_lifetime=3,
max_lifetime=5,
):
self.name = name
self.structure = copy.deepcopy(structure)
self.source = copy.deepcopy(source)
@ -313,9 +362,10 @@ class Dictionary(object):
self.max_lifetime = max_lifetime
def generate_config(self):
with open(self.config_path, 'w') as result:
if 'direct' not in self.structure.layout.get_str():
result.write('''
with open(self.config_path, "w") as result:
if "direct" not in self.structure.layout.get_str():
result.write(
"""
<clickhouse>
<dictionary>
<lifetime>
@ -329,15 +379,17 @@ class Dictionary(object):
</source>
</dictionary>
</clickhouse>
'''.format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
))
""".format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
)
)
else:
result.write('''
result.write(
"""
<clickhouse>
<dictionary>
<name>{name}</name>
@ -347,38 +399,59 @@ class Dictionary(object):
</source>
</dictionary>
</clickhouse>
'''.format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
))
""".format(
min_lifetime=self.min_lifetime,
max_lifetime=self.max_lifetime,
name=self.name,
structure=self.structure.get_structure_str(),
source=self.source.get_source_str(self.table_name),
)
)
def prepare_source(self, cluster):
self.source.prepare(self.structure, self.table_name, cluster)
def load_data(self, data):
if not self.source.prepared:
raise Exception("Cannot load data for dictionary {}, source is not prepared".format(self.name))
raise Exception(
"Cannot load data for dictionary {}, source is not prepared".format(
self.name
)
)
self.source.load_data(data, self.table_name)
def get_select_get_queries(self, field, row):
return ['select {}'.format(expr) for expr in self.structure.get_get_expressions(self.name, field, row)]
return [
"select {}".format(expr)
for expr in self.structure.get_get_expressions(self.name, field, row)
]
def get_select_get_or_default_queries(self, field, row):
return ['select {}'.format(expr) for expr in
self.structure.get_get_or_default_expressions(self.name, field, row)]
return [
"select {}".format(expr)
for expr in self.structure.get_get_or_default_expressions(
self.name, field, row
)
]
def get_select_has_queries(self, field, row):
return ['select {}'.format(expr) for expr in self.structure.get_has_expressions(self.name, field, row)]
return [
"select {}".format(expr)
for expr in self.structure.get_has_expressions(self.name, field, row)
]
def get_hierarchical_queries(self, row):
return ['select {}'.format(expr) for expr in self.structure.get_hierarchical_expressions(self.name, row)]
return [
"select {}".format(expr)
for expr in self.structure.get_hierarchical_expressions(self.name, row)
]
def get_is_in_queries(self, row, parent_row):
return ['select {}'.format(expr) for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)]
return [
"select {}".format(expr)
for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)
]
def is_complex(self):
return self.structure.layout.is_complex

View File

@ -11,9 +11,18 @@ import pymysql.cursors
import redis
import logging
class ExternalSource(object):
def __init__(self, name, internal_hostname, internal_port,
docker_hostname, docker_port, user, password):
def __init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
self.name = name
self.internal_hostname = internal_hostname
self.internal_port = int(internal_port)
@ -23,17 +32,26 @@ class ExternalSource(object):
self.password = password
def get_source_str(self, table_name):
raise NotImplementedError("Method {} is not implemented for {}".format(
"get_source_config_part", self.__class__.__name__))
raise NotImplementedError(
"Method {} is not implemented for {}".format(
"get_source_config_part", self.__class__.__name__
)
)
def prepare(self, structure, table_name, cluster):
raise NotImplementedError("Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__))
raise NotImplementedError(
"Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__
)
)
# data is banch of Row
def load_data(self, data):
raise NotImplementedError("Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__))
raise NotImplementedError(
"Method {} is not implemented for {}".format(
"prepare_remote_source", self.__class__.__name__
)
)
def compatible_with_layout(self, layout):
return True
@ -41,29 +59,32 @@ class ExternalSource(object):
class SourceMySQL(ExternalSource):
TYPE_MAPPING = {
'UInt8': 'tinyint unsigned',
'UInt16': 'smallint unsigned',
'UInt32': 'int unsigned',
'UInt64': 'bigint unsigned',
'Int8': 'tinyint',
'Int16': 'smallint',
'Int32': 'int',
'Int64': 'bigint',
'UUID': 'varchar(36)',
'Date': 'date',
'DateTime': 'datetime',
'String': 'text',
'Float32': 'float',
'Float64': 'double'
"UInt8": "tinyint unsigned",
"UInt16": "smallint unsigned",
"UInt32": "int unsigned",
"UInt64": "bigint unsigned",
"Int8": "tinyint",
"Int16": "smallint",
"Int32": "int",
"Int64": "bigint",
"UUID": "varchar(36)",
"Date": "date",
"DateTime": "datetime",
"String": "text",
"Float32": "float",
"Float64": "double",
}
def create_mysql_conn(self):
logging.debug(f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}")
logging.debug(
f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}"
)
self.connection = pymysql.connect(
user=self.user,
password=self.password,
host=self.internal_hostname,
port=self.internal_port)
port=self.internal_port,
)
def execute_mysql_query(self, query):
with warnings.catch_warnings():
@ -73,7 +94,7 @@ class SourceMySQL(ExternalSource):
self.connection.commit()
def get_source_str(self, table_name):
return '''
return """
<mysql>
<replica>
<priority>1</priority>
@ -89,7 +110,7 @@ class SourceMySQL(ExternalSource):
<password>{password}</password>
<db>test</db>
<table>{tbl}</table>
</mysql>'''.format(
</mysql>""".format(
hostname=self.docker_hostname,
port=self.docker_port,
user=self.user,
@ -101,14 +122,20 @@ class SourceMySQL(ExternalSource):
if self.internal_hostname is None:
self.internal_hostname = cluster.mysql_ip
self.create_mysql_conn()
self.execute_mysql_query("create database if not exists test default character set 'utf8'")
self.execute_mysql_query(
"create database if not exists test default character set 'utf8'"
)
self.execute_mysql_query("drop table if exists test.{}".format(table_name))
fields_strs = []
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
fields_strs.append(field.name + ' ' + self.TYPE_MAPPING[field.field_type])
create_query = '''create table test.{table_name} (
for field in (
structure.keys + structure.ordinary_fields + structure.range_fields
):
fields_strs.append(field.name + " " + self.TYPE_MAPPING[field.field_type])
create_query = """create table test.{table_name} (
{fields_str});
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
""".format(
table_name=table_name, fields_str=",".join(fields_strs)
)
self.execute_mysql_query(create_query)
self.ordered_names = structure.get_ordered_names()
self.prepared = True
@ -126,18 +153,16 @@ class SourceMySQL(ExternalSource):
else:
data = str(data)
sorted_row.append(data)
values_strs.append('(' + ','.join(sorted_row) + ')')
query = 'insert into test.{} ({}) values {}'.format(
table_name,
','.join(self.ordered_names),
','.join(values_strs))
values_strs.append("(" + ",".join(sorted_row) + ")")
query = "insert into test.{} ({}) values {}".format(
table_name, ",".join(self.ordered_names), ",".join(values_strs)
)
self.execute_mysql_query(query)
class SourceMongo(ExternalSource):
def get_source_str(self, table_name):
return '''
return """
<mongodb>
<host>{host}</host>
<port>{port}</port>
@ -146,7 +171,7 @@ class SourceMongo(ExternalSource):
<db>test</db>
<collection>{tbl}</collection>
</mongodb>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
user=self.user,
@ -155,22 +180,29 @@ class SourceMongo(ExternalSource):
)
def prepare(self, structure, table_name, cluster):
connection_str = 'mongodb://{user}:{password}@{host}:{port}'.format(
host=self.internal_hostname, port=self.internal_port,
user=self.user, password=self.password)
connection_str = "mongodb://{user}:{password}@{host}:{port}".format(
host=self.internal_hostname,
port=self.internal_port,
user=self.user,
password=self.password,
)
self.connection = pymongo.MongoClient(connection_str)
self.converters = {}
for field in structure.get_all_fields():
if field.field_type == "Date":
self.converters[field.name] = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d")
self.converters[field.name] = lambda x: datetime.datetime.strptime(
x, "%Y-%m-%d"
)
elif field.field_type == "DateTime":
def converter(x):
return datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
return datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
self.converters[field.name] = converter
else:
self.converters[field.name] = lambda x: x
self.db = self.connection['test']
self.db = self.connection["test"]
self.db.add_user(self.user, self.password)
self.prepared = True
@ -191,15 +223,15 @@ class SourceMongoURI(SourceMongo):
def compatible_with_layout(self, layout):
# It is enough to test one layout for this dictionary, since we're
# only testing that the connection with URI works.
return layout.name == 'flat'
return layout.name == "flat"
def get_source_str(self, table_name):
return '''
return """
<mongodb>
<uri>mongodb://{user}:{password}@{host}:{port}/test</uri>
<collection>{tbl}</collection>
</mongodb>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
user=self.user,
@ -209,9 +241,8 @@ class SourceMongoURI(SourceMongo):
class SourceClickHouse(ExternalSource):
def get_source_str(self, table_name):
return '''
return """
<clickhouse>
<host>{host}</host>
<port>{port}</port>
@ -220,7 +251,7 @@ class SourceClickHouse(ExternalSource):
<db>test</db>
<table>{tbl}</table>
</clickhouse>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
user=self.user,
@ -232,11 +263,15 @@ class SourceClickHouse(ExternalSource):
self.node = cluster.instances[self.docker_hostname]
self.node.query("CREATE DATABASE IF NOT EXISTS test")
fields_strs = []
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
fields_strs.append(field.name + ' ' + field.field_type)
create_query = '''CREATE TABLE test.{table_name} (
for field in (
structure.keys + structure.ordinary_fields + structure.range_fields
):
fields_strs.append(field.name + " " + field.field_type)
create_query = """CREATE TABLE test.{table_name} (
{fields_str}) ENGINE MergeTree ORDER BY tuple();
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
""".format(
table_name=table_name, fields_str=",".join(fields_strs)
)
self.node.query(create_query)
self.ordered_names = structure.get_ordered_names()
self.prepared = True
@ -254,31 +289,31 @@ class SourceClickHouse(ExternalSource):
else:
row_data = str(row_data)
sorted_row.append(row_data)
values_strs.append('(' + ','.join(sorted_row) + ')')
query = 'INSERT INTO test.{} ({}) values {}'.format(
table_name,
','.join(self.ordered_names),
','.join(values_strs))
values_strs.append("(" + ",".join(sorted_row) + ")")
query = "INSERT INTO test.{} ({}) values {}".format(
table_name, ",".join(self.ordered_names), ",".join(values_strs)
)
self.node.query(query)
class SourceFile(ExternalSource):
def get_source_str(self, table_name):
table_path = "/" + table_name + ".tsv"
return '''
return """
<file>
<path>{path}</path>
<format>TabSeparated</format>
</file>
'''.format(
""".format(
path=table_path,
)
def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root")
self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
self.ordered_names = structure.get_ordered_names()
self.prepared = True
@ -291,35 +326,45 @@ class SourceFile(ExternalSource):
for name in self.ordered_names:
sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
user="root")
str_data = "\t".join(sorted_row)
self.node.exec_in_container(
[
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
def compatible_with_layout(self, layout):
return 'cache' not in layout.name and 'direct' not in layout.name
return "cache" not in layout.name and "direct" not in layout.name
class _SourceExecutableBase(ExternalSource):
def _get_cmd(self, path):
raise NotImplementedError("Method {} is not implemented for {}".format(
"_get_cmd", self.__class__.__name__))
raise NotImplementedError(
"Method {} is not implemented for {}".format(
"_get_cmd", self.__class__.__name__
)
)
def get_source_str(self, table_name):
table_path = "/" + table_name + ".tsv"
return '''
return """
<executable>
<command>{cmd}</command>
<format>TabSeparated</format>
</executable>
'''.format(
""".format(
cmd=self._get_cmd(table_path),
)
def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root")
self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
self.ordered_names = structure.get_ordered_names()
self.prepared = True
@ -332,27 +377,31 @@ class _SourceExecutableBase(ExternalSource):
for name in self.ordered_names:
sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
user='root')
str_data = "\t".join(sorted_row)
self.node.exec_in_container(
[
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
class SourceExecutableHashed(_SourceExecutableBase):
def _get_cmd(self, path):
return "cat {}".format(path)
def compatible_with_layout(self, layout):
return 'hashed' in layout.name
return "hashed" in layout.name
class SourceExecutableCache(_SourceExecutableBase):
def _get_cmd(self, path):
return "cat - >/dev/null;cat {}".format(path)
def compatible_with_layout(self, layout):
return 'cache' in layout.name
return "cache" in layout.name
class SourceHTTPBase(ExternalSource):
@ -360,10 +409,11 @@ class SourceHTTPBase(ExternalSource):
def get_source_str(self, table_name):
self.http_port = SourceHTTPBase.PORT_COUNTER
url = "{schema}://{host}:{port}/".format(schema=self._get_schema(), host=self.docker_hostname,
port=self.http_port)
url = "{schema}://{host}:{port}/".format(
schema=self._get_schema(), host=self.docker_hostname, port=self.http_port
)
SourceHTTPBase.PORT_COUNTER += 1
return '''
return """
<http>
<url>{url}</url>
<format>TabSeparated</format>
@ -378,22 +428,37 @@ class SourceHTTPBase(ExternalSource):
</header>
</headers>
</http>
'''.format(url=url)
""".format(
url=url
)
def prepare(self, structure, table_name, cluster):
self.node = cluster.instances[self.docker_hostname]
path = "/" + table_name + ".tsv"
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user='root')
self.node.exec_in_container(
["bash", "-c", "touch {}".format(path)], user="root"
)
script_dir = os.path.dirname(os.path.realpath(__file__))
self.node.copy_file_to_container(os.path.join(script_dir, './http_server.py'), '/http_server.py')
self.node.copy_file_to_container(os.path.join(script_dir, './fake_cert.pem'), '/fake_cert.pem')
self.node.exec_in_container([
"bash",
"-c",
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
tbl=path, schema=self._get_schema(), host=self.docker_hostname, port=self.http_port)
], detach=True)
self.node.copy_file_to_container(
os.path.join(script_dir, "./http_server.py"), "/http_server.py"
)
self.node.copy_file_to_container(
os.path.join(script_dir, "./fake_cert.pem"), "/fake_cert.pem"
)
self.node.exec_in_container(
[
"bash",
"-c",
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
tbl=path,
schema=self._get_schema(),
host=self.docker_hostname,
port=self.http_port,
),
],
detach=True,
)
self.ordered_names = structure.get_ordered_names()
self.prepared = True
@ -406,9 +471,15 @@ class SourceHTTPBase(ExternalSource):
for name in self.ordered_names:
sorted_row.append(str(row.data[name]))
str_data = '\t'.join(sorted_row)
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
user='root')
str_data = "\t".join(sorted_row)
self.node.exec_in_container(
[
"bash",
"-c",
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
],
user="root",
)
class SourceHTTP(SourceHTTPBase):
@ -423,29 +494,46 @@ class SourceHTTPS(SourceHTTPBase):
class SourceCassandra(ExternalSource):
TYPE_MAPPING = {
'UInt8': 'tinyint',
'UInt16': 'smallint',
'UInt32': 'int',
'UInt64': 'bigint',
'Int8': 'tinyint',
'Int16': 'smallint',
'Int32': 'int',
'Int64': 'bigint',
'UUID': 'uuid',
'Date': 'date',
'DateTime': 'timestamp',
'String': 'text',
'Float32': 'float',
'Float64': 'double'
"UInt8": "tinyint",
"UInt16": "smallint",
"UInt32": "int",
"UInt64": "bigint",
"Int8": "tinyint",
"Int16": "smallint",
"Int32": "int",
"Int64": "bigint",
"UUID": "uuid",
"Date": "date",
"DateTime": "timestamp",
"String": "text",
"Float32": "float",
"Float64": "double",
}
def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password):
ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user,
password)
def __init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
ExternalSource.__init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.structure = dict()
def get_source_str(self, table_name):
return '''
return """
<cassandra>
<host>{host}</host>
<port>{port}</port>
@ -454,7 +542,7 @@ class SourceCassandra(ExternalSource):
<allow_filtering>1</allow_filtering>
<where>"Int64_" &lt; 1000000000000000000</where>
</cassandra>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
table=table_name,
@ -464,49 +552,79 @@ class SourceCassandra(ExternalSource):
if self.internal_hostname is None:
self.internal_hostname = cluster.cassandra_ip
self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port)
self.client = cassandra.cluster.Cluster(
[self.internal_hostname], port=self.internal_port
)
self.session = self.client.connect()
self.session.execute(
"create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};")
"create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};"
)
self.session.execute('drop table if exists test."{}"'.format(table_name))
self.structure[table_name] = structure
columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()]
columns = [
'"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type]
for col in structure.get_all_fields()
]
keys = ['"' + col.name + '"' for col in structure.keys]
query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format(
name=table_name, columns=', '.join(columns), pk=', '.join(keys))
name=table_name, columns=", ".join(columns), pk=", ".join(keys)
)
self.session.execute(query)
self.prepared = True
def get_value_to_insert(self, value, type):
if type == 'UUID':
if type == "UUID":
return uuid.UUID(value)
elif type == 'DateTime':
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
elif type == "DateTime":
return datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
return value
def load_data(self, data, table_name):
names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()]
names_and_types = [
(field.name, field.field_type)
for field in self.structure[table_name].get_all_fields()
]
columns = ['"' + col[0] + '"' for col in names_and_types]
insert = 'insert into test."{table}" ({columns}) values ({args})'.format(
table=table_name, columns=','.join(columns), args=','.join(['%s'] * len(columns)))
table=table_name,
columns=",".join(columns),
args=",".join(["%s"] * len(columns)),
)
for row in data:
values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types]
values = [
self.get_value_to_insert(row.get_value_by_name(col[0]), col[1])
for col in names_and_types
]
self.session.execute(insert, values)
class SourceRedis(ExternalSource):
def __init__(
self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, db_index,
storage_type
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
db_index,
storage_type,
):
super(SourceRedis, self).__init__(
name, internal_hostname, internal_port, docker_hostname, docker_port, user, password
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.storage_type = storage_type
self.db_index = db_index
def get_source_str(self, table_name):
return '''
return """
<redis>
<host>{host}</host>
<port>{port}</port>
@ -514,7 +632,7 @@ class SourceRedis(ExternalSource):
<db_index>{db_index}</db_index>
<storage_type>{storage_type}</storage_type>
</redis>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
password=self.password,
@ -523,8 +641,12 @@ class SourceRedis(ExternalSource):
)
def prepare(self, structure, table_name, cluster):
self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port, db=self.db_index,
password=self.password or None)
self.client = redis.StrictRedis(
host=self.internal_hostname,
port=self.internal_port,
db=self.db_index,
password=self.password or None,
)
self.prepared = True
self.ordered_names = structure.get_ordered_names()
@ -540,33 +662,52 @@ class SourceRedis(ExternalSource):
self.client.hset(*values)
def compatible_with_layout(self, layout):
return layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "hash_map"
return (
layout.is_simple
and self.storage_type == "simple"
or layout.is_complex
and self.storage_type == "hash_map"
)
class SourceAerospike(ExternalSource):
def __init__(self, name, internal_hostname, internal_port,
docker_hostname, docker_port, user, password):
ExternalSource.__init__(self, name, internal_hostname, internal_port,
docker_hostname, docker_port, user, password)
def __init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
):
ExternalSource.__init__(
self,
name,
internal_hostname,
internal_port,
docker_hostname,
docker_port,
user,
password,
)
self.namespace = "test"
self.set = "test_set"
def get_source_str(self, table_name):
print("AEROSPIKE get source str")
return '''
return """
<aerospike>
<host>{host}</host>
<port>{port}</port>
</aerospike>
'''.format(
""".format(
host=self.docker_hostname,
port=self.docker_port,
)
def prepare(self, structure, table_name, cluster):
config = {
'hosts': [(self.internal_hostname, self.internal_port)]
}
config = {"hosts": [(self.internal_hostname, self.internal_port)]}
self.client = aerospike.client(config).connect()
self.prepared = True
print("PREPARED AEROSPIKE")
@ -601,10 +742,14 @@ class SourceAerospike(ExternalSource):
for value in values:
key = (self.namespace, self.set, value[0])
print(key)
self.client.put(key, {"bin_value": value[1]}, policy={"key": aerospike.POLICY_KEY_SEND})
self.client.put(
key,
{"bin_value": value[1]},
policy={"key": aerospike.POLICY_KEY_SEND},
)
assert self.client.exists(key)
else:
assert ("VALUES SIZE != 2")
assert "VALUES SIZE != 2"
# print(values)

View File

@ -10,27 +10,44 @@ import socket
import tempfile
import logging
import os
class mk_krb_conf(object):
def __init__(self, krb_conf, kdc_ip):
self.krb_conf = krb_conf
self.kdc_ip = kdc_ip
self.amended_krb_conf = None
def __enter__(self):
with open(self.krb_conf) as f:
content = f.read()
amended_content = content.replace('hdfskerberos', self.kdc_ip)
amended_content = content.replace("hdfskerberos", self.kdc_ip)
self.amended_krb_conf = tempfile.NamedTemporaryFile(delete=False, mode="w+")
self.amended_krb_conf.write(amended_content)
self.amended_krb_conf.close()
return self.amended_krb_conf.name
def __exit__(self, type, value, traceback):
if self.amended_krb_conf is not None:
self.amended_krb_conf.close()
class HDFSApi(object):
def __init__(self, user, host, proxy_port, data_port, timeout=100, kerberized=False, principal=None,
keytab=None, krb_conf=None,
protocol = "http", hdfs_ip = None, kdc_ip = None):
def __init__(
self,
user,
host,
proxy_port,
data_port,
timeout=100,
kerberized=False,
principal=None,
keytab=None,
krb_conf=None,
protocol="http",
hdfs_ip=None,
kdc_ip=None,
):
self.host = host
self.protocol = protocol
self.proxy_port = proxy_port
@ -55,7 +72,11 @@ class HDFSApi(object):
if kerberized:
self._run_kinit()
self.kerberos_auth = reqkerb.HTTPKerberosAuth(mutual_authentication=reqkerb.DISABLED, hostname_override=self.host, principal=self.principal)
self.kerberos_auth = reqkerb.HTTPKerberosAuth(
mutual_authentication=reqkerb.DISABLED,
hostname_override=self.host,
principal=self.principal,
)
if self.kerberos_auth is None:
print("failed to obtain kerberos_auth")
else:
@ -70,7 +91,11 @@ class HDFSApi(object):
os.environ["KRB5_CONFIG"] = instantiated_krb_conf
cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(instantiated_krb_conf=instantiated_krb_conf, keytab=self.keytab, principal=self.principal)
cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(
instantiated_krb_conf=instantiated_krb_conf,
keytab=self.keytab,
principal=self.principal,
)
start = time.time()
@ -79,10 +104,18 @@ class HDFSApi(object):
res = subprocess.run(cmd, shell=True)
if res.returncode != 0:
# check_call(...) from subprocess does not print stderr, so we do it manually
logging.debug('Stderr:\n{}\n'.format(res.stderr.decode('utf-8')))
logging.debug('Stdout:\n{}\n'.format(res.stdout.decode('utf-8')))
logging.debug('Env:\n{}\n'.format(env))
raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
logging.debug(
"Stderr:\n{}\n".format(res.stderr.decode("utf-8"))
)
logging.debug(
"Stdout:\n{}\n".format(res.stdout.decode("utf-8"))
)
logging.debug("Env:\n{}\n".format(env))
raise Exception(
"Command {} return non-zero code {}: {}".format(
args, res.returncode, res.stderr.decode("utf-8")
)
)
logging.debug("KDC started, kinit successfully run")
return
@ -97,28 +130,60 @@ class HDFSApi(object):
for i in range(0, cnt):
logging.debug(f"CALL: {str(kwargs)}")
response_data = func(**kwargs)
logging.debug(f"response_data:{response_data.content} headers:{response_data.headers}")
logging.debug(
f"response_data:{response_data.content} headers:{response_data.headers}"
)
if response_data.status_code == expected_code:
return response_data
else:
logging.error(f"unexpected response_data.status_code {response_data.status_code} != {expected_code}")
logging.error(
f"unexpected response_data.status_code {response_data.status_code} != {expected_code}"
)
time.sleep(1)
response_data.raise_for_status()
def read_data(self, path, universal_newlines=True):
logging.debug("read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(self.protocol, self.host, self.hdfs_ip, self.proxy_port, self.data_port, path))
response = self.req_wrapper(requests.get, 307, url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path), headers={'host': str(self.hdfs_ip)}, allow_redirects=False, verify=False, auth=self.kerberos_auth)
logging.debug(
"read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(
self.protocol,
self.host,
self.hdfs_ip,
self.proxy_port,
self.data_port,
path,
)
)
response = self.req_wrapper(
requests.get,
307,
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(
protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path
),
headers={"host": str(self.hdfs_ip)},
allow_redirects=False,
verify=False,
auth=self.kerberos_auth,
)
# additional_params = '&'.join(response.headers['Location'].split('&')[1:2])
location = None
if self.kerberized:
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port))
location = response.headers["Location"].replace(
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
)
else:
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port))
location = response.headers["Location"].replace(
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
)
logging.debug("redirected to {}".format(location))
response_data = self.req_wrapper(requests.get, 200, url=location, headers={'host': self.hdfs_ip},
verify=False, auth=self.kerberos_auth)
response_data = self.req_wrapper(
requests.get,
200,
url=location,
headers={"host": self.hdfs_ip},
verify=False,
auth=self.kerberos_auth,
)
if universal_newlines:
return response_data.text
@ -126,23 +191,38 @@ class HDFSApi(object):
return response_data.content
def write_data(self, path, content):
logging.debug("write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format(
self.protocol, self.host, self.proxy_port, path, self.user, self.principal))
named_file = NamedTemporaryFile(mode='wb+')
logging.debug(
"write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format(
self.protocol,
self.host,
self.proxy_port,
path,
self.user,
self.principal,
)
)
named_file = NamedTemporaryFile(mode="wb+")
fpath = named_file.name
if isinstance(content, str):
content = content.encode()
named_file.write(content)
named_file.flush()
response = self.req_wrapper(requests.put, 307,
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(protocol=self.protocol, ip=self.hdfs_ip,
port=self.proxy_port,
path=path, user=self.user),
response = self.req_wrapper(
requests.put,
307,
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(
protocol=self.protocol,
ip=self.hdfs_ip,
port=self.proxy_port,
path=path,
user=self.user,
),
allow_redirects=False,
headers={'host': str(self.hdfs_ip)},
params={'overwrite' : 'true'},
verify=False, auth=self.kerberos_auth
headers={"host": str(self.hdfs_ip)},
params={"overwrite": "true"},
verify=False,
auth=self.kerberos_auth,
)
logging.debug("HDFS api response:{}".format(response.headers))
@ -150,23 +230,30 @@ class HDFSApi(object):
# additional_params = '&'.join(
# response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"])
if self.kerberized:
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port))
location = response.headers["Location"].replace(
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
)
else:
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port))
location = response.headers["Location"].replace(
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
)
with open(fpath, mode="rb") as fh:
file_data = fh.read()
protocol = "http" # self.protocol
response = self.req_wrapper(requests.put, 201,
protocol = "http" # self.protocol
response = self.req_wrapper(
requests.put,
201,
url="{location}".format(location=location),
data=file_data,
headers={'content-type':'text/plain', 'host': str(self.hdfs_ip)},
params={'file': path, 'user.name' : self.user},
allow_redirects=False, verify=False, auth=self.kerberos_auth
headers={"content-type": "text/plain", "host": str(self.hdfs_ip)},
params={"file": path, "user.name": self.user},
allow_redirects=False,
verify=False,
auth=self.kerberos_auth,
)
logging.debug(f"{response.content} {response.headers}")
def write_gzip_data(self, path, content):
if isinstance(content, str):
content = content.encode()
@ -176,4 +263,10 @@ class HDFSApi(object):
self.write_data(path, out.getvalue())
def read_gzip_data(self, path):
return gzip.GzipFile(fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))).read().decode()
return (
gzip.GzipFile(
fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))
)
.read()
.decode()
)

View File

@ -9,9 +9,14 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
# Decorator used to see if authentication works for external dictionary who use a HTTP source.
def check_auth(fn):
def wrapper(req):
auth_header = req.headers.get('authorization', None)
api_key = req.headers.get('api-key', None)
if not auth_header or auth_header != 'Basic Zm9vOmJhcg==' or not api_key or api_key != 'secret':
auth_header = req.headers.get("authorization", None)
api_key = req.headers.get("api-key", None)
if (
not auth_header
or auth_header != "Basic Zm9vOmJhcg=="
or not api_key
or api_key != "secret"
):
req.send_response(401)
else:
fn(req)
@ -35,15 +40,15 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
def __send_headers(self):
self.send_response(200)
self.send_header('Content-type', 'text/tsv')
self.send_header("Content-type", "text/tsv")
self.end_headers()
def __send_data(self, only_ids=None):
with open(data_path, 'r') as fl:
reader = csv.reader(fl, delimiter='\t')
with open(data_path, "r") as fl:
reader = csv.reader(fl, delimiter="\t")
for row in reader:
if not only_ids or (row[0] in only_ids):
self.wfile.write(('\t'.join(row) + '\n').encode())
self.wfile.write(("\t".join(row) + "\n").encode())
def __read_and_decode_post_ids(self):
data = self.__read_and_decode_post_data()
@ -51,7 +56,7 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
def __read_and_decode_post_data(self):
transfer_encoding = self.headers.get("Transfer-encoding")
decoded = "";
decoded = ""
if transfer_encoding == "chunked":
while True:
s = self.rfile.readline().decode()
@ -69,19 +74,29 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
HTTPServer.address_family = socket.AF_INET6
httpd = HTTPServer(server_address, TSVHTTPHandler)
if schema == "https":
httpd.socket = ssl.wrap_socket(httpd.socket, certfile=cert_path, server_side=True)
httpd.socket = ssl.wrap_socket(
httpd.socket, certfile=cert_path, server_side=True
)
httpd.serve_forever()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple HTTP server returns data from file")
parser = argparse.ArgumentParser(
description="Simple HTTP server returns data from file"
)
parser.add_argument("--host", default="localhost")
parser.add_argument("--port", default=5555, type=int)
parser.add_argument("--data-path", required=True)
parser.add_argument("--schema", choices=("http", "https"), required=True)
parser.add_argument("--cert-path", default="./fake_cert.pem")
parser.add_argument('--address-family', choices=("ipv4", "ipv6"), default="ipv4")
parser.add_argument("--address-family", choices=("ipv4", "ipv6"), default="ipv4")
args = parser.parse_args()
start_server((args.host, args.port), args.data_path, args.schema, args.cert_path, args.address_family)
start_server(
(args.host, args.port),
args.data_path,
args.schema,
args.cert_path,
args.address_family,
)

View File

@ -22,26 +22,38 @@ class PartitionManager:
self._netem_delayed_instances = []
_NetworkManager.get()
def drop_instance_zk_connections(self, instance, action='DROP'):
def drop_instance_zk_connections(self, instance, action="DROP"):
self._check_instance(instance)
self._add_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
self._add_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
self._add_rule(
{"source": instance.ip_address, "destination_port": 2181, "action": action}
)
self._add_rule(
{"destination": instance.ip_address, "source_port": 2181, "action": action}
)
def restore_instance_zk_connections(self, instance, action='DROP'):
def restore_instance_zk_connections(self, instance, action="DROP"):
self._check_instance(instance)
self._delete_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
self._delete_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
self._delete_rule(
{"source": instance.ip_address, "destination_port": 2181, "action": action}
)
self._delete_rule(
{"destination": instance.ip_address, "source_port": 2181, "action": action}
)
def partition_instances(self, left, right, port=None, action='DROP'):
def partition_instances(self, left, right, port=None, action="DROP"):
self._check_instance(left)
self._check_instance(right)
def create_rule(src, dst):
rule = {'source': src.ip_address, 'destination': dst.ip_address, 'action': action}
rule = {
"source": src.ip_address,
"destination": dst.ip_address,
"action": action,
}
if port is not None:
rule['destination_port'] = port
rule["destination_port"] = port
return rule
self._add_rule(create_rule(left, right))
@ -57,7 +69,9 @@ class PartitionManager:
while self._netem_delayed_instances:
instance = self._netem_delayed_instances.pop()
instance.exec_in_container(["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root")
instance.exec_in_container(
["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root"
)
def pop_rules(self):
res = self._iptables_rules[:]
@ -71,7 +85,7 @@ class PartitionManager:
@staticmethod
def _check_instance(instance):
if instance.ip_address is None:
raise Exception('Instance + ' + instance.name + ' is not launched!')
raise Exception("Instance + " + instance.name + " is not launched!")
def _add_rule(self, rule):
_NetworkManager.get().add_iptables_rule(**rule)
@ -82,7 +96,14 @@ class PartitionManager:
self._iptables_rules.remove(rule)
def _add_tc_netem_delay(self, instance, delay_ms):
instance.exec_in_container(["bash", "-c", "tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms)], user="root")
instance.exec_in_container(
[
"bash",
"-c",
"tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms),
],
user="root",
)
self._netem_delayed_instances.append(instance)
def __enter__(self):
@ -127,12 +148,12 @@ class _NetworkManager:
return cls._instance
def add_iptables_rule(self, **kwargs):
cmd = ['iptables', '--wait', '-I', 'DOCKER-USER', '1']
cmd = ["iptables", "--wait", "-I", "DOCKER-USER", "1"]
cmd.extend(self._iptables_cmd_suffix(**kwargs))
self._exec_run(cmd, privileged=True)
def delete_iptables_rule(self, **kwargs):
cmd = ['iptables', '--wait', '-D', 'DOCKER-USER']
cmd = ["iptables", "--wait", "-D", "DOCKER-USER"]
cmd.extend(self._iptables_cmd_suffix(**kwargs))
self._exec_run(cmd, privileged=True)
@ -144,40 +165,66 @@ class _NetworkManager:
res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True)
if res.returncode != 0:
logging.info("All iptables rules cleared, " + str(iptables_iter) + " iterations, last error: " + str(res.stderr))
logging.info(
"All iptables rules cleared, "
+ str(iptables_iter)
+ " iterations, last error: "
+ str(res.stderr)
)
return
@staticmethod
def _iptables_cmd_suffix(
source=None, destination=None,
source_port=None, destination_port=None,
action=None, probability=None, custom_args=None):
source=None,
destination=None,
source_port=None,
destination_port=None,
action=None,
probability=None,
custom_args=None,
):
ret = []
if probability is not None:
ret.extend(['-m', 'statistic', '--mode', 'random', '--probability', str(probability)])
ret.extend(['-p', 'tcp'])
ret.extend(
[
"-m",
"statistic",
"--mode",
"random",
"--probability",
str(probability),
]
)
ret.extend(["-p", "tcp"])
if source is not None:
ret.extend(['-s', source])
ret.extend(["-s", source])
if destination is not None:
ret.extend(['-d', destination])
ret.extend(["-d", destination])
if source_port is not None:
ret.extend(['--sport', str(source_port)])
ret.extend(["--sport", str(source_port)])
if destination_port is not None:
ret.extend(['--dport', str(destination_port)])
ret.extend(["--dport", str(destination_port)])
if action is not None:
ret.extend(['-j'] + action.split())
ret.extend(["-j"] + action.split())
if custom_args is not None:
ret.extend(custom_args)
return ret
def __init__(
self,
container_expire_timeout=50, container_exit_timeout=60, docker_api_version=os.environ.get("DOCKER_API_VERSION")):
self,
container_expire_timeout=50,
container_exit_timeout=60,
docker_api_version=os.environ.get("DOCKER_API_VERSION"),
):
self.container_expire_timeout = container_expire_timeout
self.container_exit_timeout = container_exit_timeout
self._docker_client = docker.DockerClient(base_url='unix:///var/run/docker.sock', version=docker_api_version, timeout=600)
self._docker_client = docker.DockerClient(
base_url="unix:///var/run/docker.sock",
version=docker_api_version,
timeout=600,
)
self._container = None
@ -194,29 +241,41 @@ class _NetworkManager:
except docker.errors.NotFound:
break
except Exception as ex:
print("Error removing network blocade container, will try again", str(ex))
print(
"Error removing network blocade container, will try again",
str(ex),
)
time.sleep(i)
image = subprocess.check_output("docker images -q clickhouse/integration-helper 2>/dev/null", shell=True)
image = subprocess.check_output(
"docker images -q clickhouse/integration-helper 2>/dev/null", shell=True
)
if not image.strip():
print("No network image helper, will try download")
# for some reason docker api may hang if image doesn't exist, so we download it
# before running
for i in range(5):
try:
subprocess.check_call("docker pull clickhouse/integration-helper", shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
"docker pull clickhouse/integration-helper", shell=True
)
break
except:
time.sleep(i)
else:
raise Exception("Cannot pull clickhouse/integration-helper image")
self._container = self._docker_client.containers.run('clickhouse/integration-helper',
auto_remove=True,
command=('sleep %s' % self.container_exit_timeout),
# /run/xtables.lock passed inside for correct iptables --wait
volumes={'/run/xtables.lock': {'bind': '/run/xtables.lock', 'mode': 'ro' }},
detach=True, network_mode='host')
self._container = self._docker_client.containers.run(
"clickhouse/integration-helper",
auto_remove=True,
command=("sleep %s" % self.container_exit_timeout),
# /run/xtables.lock passed inside for correct iptables --wait
volumes={
"/run/xtables.lock": {"bind": "/run/xtables.lock", "mode": "ro"}
},
detach=True,
network_mode="host",
)
container_id = self._container.id
self._container_expire_time = time.time() + self.container_expire_timeout
@ -233,8 +292,8 @@ class _NetworkManager:
container = self._ensure_container()
handle = self._docker_client.api.exec_create(container.id, cmd, **kwargs)
output = self._docker_client.api.exec_start(handle).decode('utf8')
exit_code = self._docker_client.api.exec_inspect(handle)['ExitCode']
output = self._docker_client.api.exec_start(handle).decode("utf8")
exit_code = self._docker_client.api.exec_inspect(handle)["ExitCode"]
if exit_code != 0:
print(output)
@ -242,30 +301,56 @@ class _NetworkManager:
return output
# Approximately mesure network I/O speed for interface
class NetThroughput(object):
def __init__(self, node):
self.node = node
# trying to get default interface and check it in /proc/net/dev
self.interface = self.node.exec_in_container(["bash", "-c", "awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'"]).strip()
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip()
if not check: # if check is not successful just try eth{1-10}
self.interface = self.node.exec_in_container(
[
"bash",
"-c",
"awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'",
]
).strip()
check = self.node.exec_in_container(
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
).strip()
if not check: # if check is not successful just try eth{1-10}
for i in range(10):
try:
self.interface = self.node.exec_in_container(["bash", "-c", f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'"]).strip()
self.interface = self.node.exec_in_container(
[
"bash",
"-c",
f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'",
]
).strip()
break
except Exception as ex:
print(f"No interface eth{i}")
else:
raise Exception("No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration")
raise Exception(
"No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration"
)
try:
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip()
check = self.node.exec_in_container(
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
).strip()
if not check:
raise Exception(f"No such interface {self.interface} found in /proc/net/dev")
raise Exception(
f"No such interface {self.interface} found in /proc/net/dev"
)
except:
logging.error("All available interfaces %s", self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"]))
raise Exception(f"No such interface {self.interface} found in /proc/net/dev")
logging.error(
"All available interfaces %s",
self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"]),
)
raise Exception(
f"No such interface {self.interface} found in /proc/net/dev"
)
self.current_in = self._get_in_bytes()
self.current_out = self._get_out_bytes()
@ -273,27 +358,47 @@ class NetThroughput(object):
def _get_in_bytes(self):
try:
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev'])
result = self.node.exec_in_container(
[
"bash",
"-c",
f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev',
]
)
except:
raise Exception(f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}")
raise Exception(
f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}"
)
try:
return int(result)
except:
raise Exception(f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}")
raise Exception(
f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}"
)
def _get_out_bytes(self):
try:
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}\' /proc/net/dev'])
result = self.node.exec_in_container(
[
"bash",
"-c",
f"awk \"/^ *{self.interface}:/\"' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}' /proc/net/dev",
]
)
except:
raise Exception(f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}")
raise Exception(
f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}"
)
try:
return int(result)
except:
raise Exception(f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}")
raise Exception(
f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}"
)
def measure_speed(self, measure='bytes'):
def measure_speed(self, measure="bytes"):
new_in = self._get_in_bytes()
new_out = self._get_out_bytes()
current_time = time.time()
@ -304,11 +409,11 @@ class NetThroughput(object):
self.current_in = new_in
self.measure_time = current_time
if measure == 'bytes':
if measure == "bytes":
return in_speed, out_speed
elif measure == 'kilobytes':
return in_speed / 1024., out_speed / 1024.
elif measure == 'megabytes':
elif measure == "kilobytes":
return in_speed / 1024.0, out_speed / 1024.0
elif measure == "megabytes":
return in_speed / (1024 * 1024), out_speed / (1024 * 1024)
else:
raise Exception(f"Unknown measure {measure}")

View File

@ -23,11 +23,21 @@ postgres_table_template_5 = """
key Integer NOT NULL, value UUID, PRIMARY KEY(key))
"""
def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False):
def get_postgres_conn(
ip,
port,
database=False,
auto_commit=True,
database_name="postgres_database",
replication=False,
):
if database == True:
conn_string = f"host={ip} port={port} dbname='{database_name}' user='postgres' password='mysecretpassword'"
else:
conn_string = f"host={ip} port={port} user='postgres' password='mysecretpassword'"
conn_string = (
f"host={ip} port={port} user='postgres' password='mysecretpassword'"
)
if replication:
conn_string += " replication='database'"
@ -38,33 +48,41 @@ def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name=
conn.autocommit = True
return conn
def create_replication_slot(conn, slot_name='user_slot'):
def create_replication_slot(conn, slot_name="user_slot"):
cursor = conn.cursor()
cursor.execute(f'CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT')
cursor.execute(
f"CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT"
)
result = cursor.fetchall()
print(result[0][0]) # slot name
print(result[0][1]) # start lsn
print(result[0][2]) # snapshot
print(result[0][0]) # slot name
print(result[0][1]) # start lsn
print(result[0][2]) # snapshot
return result[0][2]
def drop_replication_slot(conn, slot_name='user_slot'):
def drop_replication_slot(conn, slot_name="user_slot"):
cursor = conn.cursor()
cursor.execute(f"select pg_drop_replication_slot('{slot_name}')")
def create_postgres_schema(cursor, schema_name):
drop_postgres_schema(cursor, schema_name)
cursor.execute(f'CREATE SCHEMA {schema_name}')
cursor.execute(f"CREATE SCHEMA {schema_name}")
def drop_postgres_schema(cursor, schema_name):
cursor.execute(f'DROP SCHEMA IF EXISTS {schema_name} CASCADE')
cursor.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template):
def create_postgres_table(
cursor, table_name, replica_identity_full=False, template=postgres_table_template
):
drop_postgres_table(cursor, table_name)
cursor.execute(template.format(table_name))
if replica_identity_full:
cursor.execute(f'ALTER TABLE {table_name} REPLICA IDENTITY FULL;')
cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;")
def drop_postgres_table(cursor, table_name):
cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """)
@ -74,6 +92,7 @@ def create_postgres_table_with_schema(cursor, schema_name, table_name):
drop_postgres_table_with_schema(cursor, schema_name, table_name)
cursor.execute(postgres_table_template_4.format(schema_name, table_name))
def drop_postgres_table_with_schema(cursor, schema_name, table_name):
cursor.execute(f"""DROP TABLE IF EXISTS "{schema_name}"."{table_name}" """)
@ -102,14 +121,14 @@ class PostgresManager:
def prepare(self):
conn = get_postgres_conn(ip=self.ip, port=self.port)
cursor = conn.cursor()
self.create_postgres_db(cursor, 'postgres_database')
self.create_postgres_db(cursor, "postgres_database")
self.create_clickhouse_postgres_db(ip=self.ip, port=self.port)
def clear(self):
if self.conn.closed == 0:
self.conn.close()
for db in self.created_materialized_postgres_db_list.copy():
self.drop_materialized_db(db);
self.drop_materialized_db(db)
for db in self.created_ch_postgres_db_list.copy():
self.drop_clickhouse_postgres_db(db)
if len(self.created_postgres_db_list) > 0:
@ -122,38 +141,54 @@ class PostgresManager:
self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
return self.conn.cursor()
def create_postgres_db(self, cursor, name='postgres_database'):
def create_postgres_db(self, cursor, name="postgres_database"):
self.drop_postgres_db(cursor, name)
self.created_postgres_db_list.add(name)
cursor.execute(f"CREATE DATABASE {name}")
def drop_postgres_db(self, cursor, name='postgres_database'):
def drop_postgres_db(self, cursor, name="postgres_database"):
cursor.execute(f"DROP DATABASE IF EXISTS {name}")
if name in self.created_postgres_db_list:
self.created_postgres_db_list.remove(name)
def create_clickhouse_postgres_db(self, ip, port, name='postgres_database', database_name='postgres_database', schema_name=''):
def create_clickhouse_postgres_db(
self,
ip,
port,
name="postgres_database",
database_name="postgres_database",
schema_name="",
):
self.drop_clickhouse_postgres_db(name)
self.created_ch_postgres_db_list.add(name)
if len(schema_name) == 0:
self.instance.query(f'''
self.instance.query(
f"""
CREATE DATABASE {name}
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')''')
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')"""
)
else:
self.instance.query(f'''
self.instance.query(
f"""
CREATE DATABASE {name}
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')''')
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')"""
)
def drop_clickhouse_postgres_db(self, name='postgres_database'):
self.instance.query(f'DROP DATABASE IF EXISTS {name}')
def drop_clickhouse_postgres_db(self, name="postgres_database"):
self.instance.query(f"DROP DATABASE IF EXISTS {name}")
if name in self.created_ch_postgres_db_list:
self.created_ch_postgres_db_list.remove(name)
def create_materialized_db(self, ip, port,
materialized_database='test_database', postgres_database='postgres_database',
settings=[], table_overrides=''):
def create_materialized_db(
self,
ip,
port,
materialized_database="test_database",
postgres_database="postgres_database",
settings=[],
table_overrides="",
):
self.created_materialized_postgres_db_list.add(materialized_database)
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}")
@ -162,17 +197,17 @@ class PostgresManager:
create_query += " SETTINGS "
for i in range(len(settings)):
if i != 0:
create_query += ', '
create_query += ", "
create_query += settings[i]
create_query += table_overrides
self.instance.query(create_query)
assert materialized_database in self.instance.query('SHOW DATABASES')
assert materialized_database in self.instance.query("SHOW DATABASES")
def drop_materialized_db(self, materialized_database='test_database'):
self.instance.query(f'DROP DATABASE IF EXISTS {materialized_database} NO DELAY')
def drop_materialized_db(self, materialized_database="test_database"):
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} NO DELAY")
if materialized_database in self.created_materialized_postgres_db_list:
self.created_materialized_postgres_db_list.remove(materialized_database)
assert materialized_database not in self.instance.query('SHOW DATABASES')
assert materialized_database not in self.instance.query("SHOW DATABASES")
def create_and_fill_postgres_table(self, table_name):
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
@ -180,82 +215,109 @@ class PostgresManager:
self.create_and_fill_postgres_table_from_cursor(cursor, table_name)
def create_and_fill_postgres_table_from_cursor(self, cursor, table_name):
create_postgres_table(cursor, table_name);
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)")
create_postgres_table(cursor, table_name)
self.instance.query(
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)"
)
def create_and_fill_postgres_tables(self, tables_num, numbers=50):
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
cursor = conn.cursor()
self.create_and_fill_postgres_tables_from_cursor(cursor, tables_num, numbers=numbers)
self.create_and_fill_postgres_tables_from_cursor(
cursor, tables_num, numbers=numbers
)
def create_and_fill_postgres_tables_from_cursor(self, cursor, tables_num, numbers=50):
def create_and_fill_postgres_tables_from_cursor(
self, cursor, tables_num, numbers=50
):
for i in range(tables_num):
table_name = f'postgresql_replica_{i}'
create_postgres_table(cursor, table_name);
table_name = f"postgresql_replica_{i}"
create_postgres_table(cursor, table_name)
if numbers > 0:
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})")
self.instance.query(
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})"
)
queries = [
'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);',
'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;',
'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;',
"INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);",
"DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;",
"UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;",
"UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0",
'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);',
'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;',
'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;',
"INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);",
"DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;",
"UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;",
"UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1",
'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;',
'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;',
'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);',
'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;',
'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;',
"DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;",
"UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;",
"INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);",
"DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;",
"UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;",
"UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1",
'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);',
'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;',
"INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);",
"DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;",
"UPDATE postgresql_replica_{} SET key=key+10000000",
'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;',
'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;'
]
"UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;",
"DELETE FROM postgresql_replica_{} WHERE value%5 = 0;",
]
def assert_nested_table_is_created(instance, table_name, materialized_database='test_database', schema_name=''):
def assert_nested_table_is_created(
instance, table_name, materialized_database="test_database", schema_name=""
):
if len(schema_name) == 0:
table = table_name
else:
table = schema_name + "." + table_name
print(f'Checking table {table} exists in {materialized_database}')
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}')
print(f"Checking table {table} exists in {materialized_database}")
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
while table not in database_tables:
time.sleep(0.2)
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}')
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
assert(table in database_tables)
assert table in database_tables
def assert_number_of_columns(instance, expected, table_name, database_name='test_database'):
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')")
while (int(result) != expected):
def assert_number_of_columns(
instance, expected, table_name, database_name="test_database"
):
result = instance.query(
f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
)
while int(result) != expected:
time.sleep(1)
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')")
print('Number of columns ok')
result = instance.query(
f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
)
print("Number of columns ok")
def check_tables_are_synchronized(instance, table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''):
assert_nested_table_is_created(instance, table_name, materialized_database, schema_name)
def check_tables_are_synchronized(
instance,
table_name,
order_by="key",
postgres_database="postgres_database",
materialized_database="test_database",
schema_name="",
):
assert_nested_table_is_created(
instance, table_name, materialized_database, schema_name
)
table_path = ''
table_path = ""
if len(schema_name) == 0:
table_path = f'{materialized_database}.{table_name}'
table_path = f"{materialized_database}.{table_name}"
else:
table_path = f'{materialized_database}.`{schema_name}.{table_name}`'
table_path = f"{materialized_database}.`{schema_name}.{table_name}`"
print(f"Checking table is synchronized: {table_path}")
result_query = f'select * from {table_path} order by {order_by};'
result_query = f"select * from {table_path} order by {order_by};"
expected = instance.query(f'select * from {postgres_database}.{table_name} order by {order_by};')
expected = instance.query(
f"select * from {postgres_database}.{table_name} order by {order_by};"
)
result = instance.query(result_query)
for _ in range(30):
@ -265,9 +327,16 @@ def check_tables_are_synchronized(instance, table_name, order_by='key', postgres
time.sleep(0.5)
result = instance.query(result_query)
assert(result == expected)
assert result == expected
def check_several_tables_are_synchronized(instance, tables_num, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''):
def check_several_tables_are_synchronized(
instance,
tables_num,
order_by="key",
postgres_database="postgres_database",
materialized_database="test_database",
schema_name="",
):
for i in range(tables_num):
check_tables_are_synchronized(instance, f'postgresql_replica_{i}');
check_tables_are_synchronized(instance, f"postgresql_replica_{i}")

View File

@ -5,17 +5,17 @@ import os.path
# Without this function all workers will log to the same log file
# and mix everything together making it much more difficult for troubleshooting.
def setup():
worker_name = os.environ.get('PYTEST_XDIST_WORKER', 'master')
if worker_name == 'master':
worker_name = os.environ.get("PYTEST_XDIST_WORKER", "master")
if worker_name == "master":
return
logger = logging.getLogger('')
logger = logging.getLogger("")
new_handlers = []
handlers_to_remove = []
for handler in logger.handlers:
if isinstance(handler, logging.FileHandler):
filename, ext = os.path.splitext(handler.baseFilename)
if not filename.endswith('-' + worker_name):
new_filename = filename + '-' + worker_name
if not filename.endswith("-" + worker_name):
new_filename = filename + "-" + worker_name
new_handler = logging.FileHandler(new_filename + ext)
new_handler.setFormatter(handler.formatter)
new_handler.setLevel(handler.level)

View File

@ -13,12 +13,18 @@ class TSV:
elif isinstance(contents, str) or isinstance(contents, str):
raw_lines = contents.splitlines(True)
elif isinstance(contents, list):
raw_lines = ['\t'.join(map(str, l)) if isinstance(l, list) else str(l) for l in contents]
raw_lines = [
"\t".join(map(str, l)) if isinstance(l, list) else str(l)
for l in contents
]
elif isinstance(contents, TSV):
self.lines = contents.lines
return
else:
raise TypeError("contents must be either file or string or list, actual type: " + type(contents).__name__)
raise TypeError(
"contents must be either file or string or list, actual type: "
+ type(contents).__name__
)
self.lines = [l.strip() for l in raw_lines if l.strip()]
def __eq__(self, other):
@ -31,13 +37,18 @@ class TSV:
return self != TSV(other)
return self.lines != other.lines
def diff(self, other, n1='', n2=''):
def diff(self, other, n1="", n2=""):
if not isinstance(other, TSV):
return self.diff(TSV(other), n1=n1, n2=n2)
return list(line.rstrip() for line in difflib.unified_diff(self.lines, other.lines, fromfile=n1, tofile=n2))[2:]
return list(
line.rstrip()
for line in difflib.unified_diff(
self.lines, other.lines, fromfile=n1, tofile=n2
)
)[2:]
def __str__(self):
return '\n'.join(self.lines)
return "\n".join(self.lines)
def __repr__(self):
return self.__str__()
@ -50,29 +61,70 @@ class TSV:
return [line.split("\t") for line in contents.split("\n") if line.strip()]
def assert_eq_with_retry(instance, query, expectation, retry_count=20, sleep_time=0.5, stdin=None, timeout=None,
settings=None, user=None, ignore_error=False, get_result=lambda x: x):
def assert_eq_with_retry(
instance,
query,
expectation,
retry_count=20,
sleep_time=0.5,
stdin=None,
timeout=None,
settings=None,
user=None,
ignore_error=False,
get_result=lambda x: x,
):
expectation_tsv = TSV(expectation)
for i in range(retry_count):
try:
if TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
ignore_error=ignore_error))) == expectation_tsv:
if (
TSV(
get_result(
instance.query(
query,
user=user,
stdin=stdin,
timeout=timeout,
settings=settings,
ignore_error=ignore_error,
)
)
)
== expectation_tsv
):
break
time.sleep(sleep_time)
except Exception as ex:
logging.exception(f"assert_eq_with_retry retry {i+1} exception {ex}")
time.sleep(sleep_time)
else:
val = TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
ignore_error=ignore_error)))
val = TSV(
get_result(
instance.query(
query,
user=user,
stdin=stdin,
timeout=timeout,
settings=settings,
ignore_error=ignore_error,
)
)
)
if expectation_tsv != val:
raise AssertionError("'{}' != '{}'\n{}".format(expectation_tsv, val, '\n'.join(
expectation_tsv.diff(val, n1="expectation", n2="query"))))
raise AssertionError(
"'{}' != '{}'\n{}".format(
expectation_tsv,
val,
"\n".join(expectation_tsv.diff(val, n1="expectation", n2="query")),
)
)
def assert_logs_contain(instance, substring):
if not instance.contains_in_log(substring):
raise AssertionError("'{}' not found in logs".format(substring))
def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_time=0.5):
for i in range(retry_count):
try:
@ -85,7 +137,10 @@ def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_ti
else:
raise AssertionError("'{}' not found in logs".format(substring))
def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}):
def exec_query_with_retry(
instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}
):
exception = None
for cnt in range(retry_count):
try:
@ -96,16 +151,21 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen
except Exception as ex:
exception = ex
if not silent:
logging.exception(f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry")
logging.exception(
f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry"
)
time.sleep(sleep_time)
else:
raise exception
def csv_compare(result, expected):
csv_result = TSV(result)
csv_expected = TSV(expected)
mismatch = []
max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected)
max_len = (
len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected)
)
for i in range(max_len):
if i >= len(csv_result):
mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i]))

View File

@ -8,30 +8,30 @@ sys.path.insert(0, os.path.join(CURDIR))
from . import uexpect
prompt = ':\) '
end_of_block = r'.*\r\n.*\r\n'
prompt = ":\) "
end_of_block = r".*\r\n.*\r\n"
class client(object):
def __init__(self, command=None, name='', log=None):
self.client = uexpect.spawn(['/bin/bash', '--noediting'])
def __init__(self, command=None, name="", log=None):
self.client = uexpect.spawn(["/bin/bash", "--noediting"])
if command is None:
command = '/usr/bin/clickhouse-client'
command = "/usr/bin/clickhouse-client"
self.client.command = command
self.client.eol('\r')
self.client.eol("\r")
self.client.logger(log, prefix=name)
self.client.timeout(20)
self.client.expect('[#\$] ', timeout=2)
self.client.expect("[#\$] ", timeout=2)
self.client.send(command)
def __enter__(self):
return self.client.__enter__()
def __exit__(self, type, value, traceback):
self.client.reader['kill_event'].set()
self.client.reader["kill_event"].set()
# send Ctrl-C
self.client.send('\x03', eol='')
self.client.send("\x03", eol="")
time.sleep(0.3)
self.client.send('quit', eol='\r')
self.client.send('\x03', eol='')
self.client.send("quit", eol="\r")
self.client.send("\x03", eol="")
return self.client.__exit__(type, value, traceback)

View File

@ -25,7 +25,7 @@ class TimeoutError(Exception):
self.timeout = timeout
def __str__(self):
return 'Timeout %.3fs' % float(self.timeout)
return "Timeout %.3fs" % float(self.timeout)
class ExpectTimeoutError(Exception):
@ -35,12 +35,12 @@ class ExpectTimeoutError(Exception):
self.buffer = buffer
def __str__(self):
s = 'Timeout %.3fs ' % float(self.timeout)
s = "Timeout %.3fs " % float(self.timeout)
if self.pattern:
s += 'for %s ' % repr(self.pattern.pattern)
s += "for %s " % repr(self.pattern.pattern)
if self.buffer:
s += 'buffer %s ' % repr(self.buffer[:])
s += 'or \'%s\'' % ','.join(['%x' % ord(c) for c in self.buffer[:]])
s += "buffer %s " % repr(self.buffer[:])
s += "or '%s'" % ",".join(["%x" % ord(c) for c in self.buffer[:]])
return s
@ -55,12 +55,12 @@ class IO(object):
TIMEOUT = Timeout
class Logger(object):
def __init__(self, logger, prefix=''):
def __init__(self, logger, prefix=""):
self._logger = logger
self._prefix = prefix
def write(self, data):
self._logger.write(('\n' + data).replace('\n', '\n' + self._prefix))
self._logger.write(("\n" + data).replace("\n", "\n" + self._prefix))
def flush(self):
self._logger.flush()
@ -77,7 +77,7 @@ class IO(object):
self.reader = reader
self._timeout = None
self._logger = None
self._eol = ''
self._eol = ""
def __enter__(self):
return self
@ -85,7 +85,7 @@ class IO(object):
def __exit__(self, type, value, traceback):
self.close()
def logger(self, logger=None, prefix=''):
def logger(self, logger=None, prefix=""):
if logger:
self._logger = self.Logger(logger, prefix=prefix)
return self._logger
@ -101,15 +101,15 @@ class IO(object):
return self._eol
def close(self, force=True):
self.reader['kill_event'].set()
os.system('pkill -TERM -P %d' % self.process.pid)
self.reader["kill_event"].set()
os.system("pkill -TERM -P %d" % self.process.pid)
if force:
self.process.kill()
else:
self.process.terminate()
os.close(self.master)
if self._logger:
self._logger.write('\n')
self._logger.write("\n")
self._logger.flush()
def send(self, data, eol=None):
@ -135,9 +135,9 @@ class IO(object):
if self.buffer is not None:
self.match = pattern.search(self.buffer, 0)
if self.match is not None:
self.after = self.buffer[self.match.start():self.match.end()]
self.before = self.buffer[:self.match.start()]
self.buffer = self.buffer[self.match.end():]
self.after = self.buffer[self.match.start() : self.match.end()]
self.before = self.buffer[: self.match.start()]
self.buffer = self.buffer[self.match.end() :]
break
if timeleft < 0:
break
@ -145,16 +145,16 @@ class IO(object):
data = self.read(timeout=timeleft, raise_exception=True)
except TimeoutError:
if self._logger:
self._logger.write((self.buffer or '') + '\n')
self._logger.write((self.buffer or "") + "\n")
self._logger.flush()
exception = ExpectTimeoutError(pattern, timeout, self.buffer)
self.buffer = None
raise exception
timeleft -= (time.time() - start_time)
timeleft -= time.time() - start_time
if data:
self.buffer = (self.buffer + data) if self.buffer else data
if self._logger:
self._logger.write((self.before or '') + (self.after or ''))
self._logger.write((self.before or "") + (self.after or ""))
self._logger.flush()
if self.match is None:
exception = ExpectTimeoutError(pattern, timeout, self.buffer)
@ -163,7 +163,7 @@ class IO(object):
return self.match
def read(self, timeout=0, raise_exception=False):
data = ''
data = ""
timeleft = timeout
try:
while timeleft >= 0:
@ -171,7 +171,7 @@ class IO(object):
data += self.queue.get(timeout=timeleft)
if data:
break
timeleft -= (time.time() - start_time)
timeleft -= time.time() - start_time
except Empty:
if data:
return data
@ -186,7 +186,14 @@ class IO(object):
def spawn(command):
master, slave = pty.openpty()
process = Popen(command, preexec_fn=os.setsid, stdout=slave, stdin=slave, stderr=slave, bufsize=1)
process = Popen(
command,
preexec_fn=os.setsid,
stdout=slave,
stdin=slave,
stderr=slave,
bufsize=1,
)
os.close(slave)
queue = Queue()
@ -195,14 +202,19 @@ def spawn(command):
thread.daemon = True
thread.start()
return IO(process, master, queue, reader={'thread': thread, 'kill_event': reader_kill_event})
return IO(
process,
master,
queue,
reader={"thread": thread, "kill_event": reader_kill_event},
)
def reader(process, out, queue, kill_event):
while True:
try:
# TODO: there are some issues with 1<<16 buffer size
data = os.read(out, 1<<17).decode(errors='replace')
data = os.read(out, 1 << 17).decode(errors="replace")
queue.put(data)
except:
if kill_event.is_set():

View File

@ -11,11 +11,13 @@ class SafeThread(threading.Thread):
super().__init__()
self.target = target
self.exception = None
def run(self):
try:
self.target()
except Exception as e: # pylint: disable=broad-except
except Exception as e: # pylint: disable=broad-except
self.exception = e
def join(self, timeout=None):
super().join(timeout)
if self.exception:
@ -24,7 +26,7 @@ class SafeThread(threading.Thread):
def random_string(length):
letters = string.ascii_letters
return ''.join(random.choice(letters) for i in range(length))
return "".join(random.choice(letters) for i in range(length))
def generate_values(date_str, count, sign=1):
@ -34,10 +36,10 @@ def generate_values(date_str, count, sign=1):
def replace_config(config_path, old, new):
config = open(config_path, 'r')
config = open(config_path, "r")
config_lines = config.readlines()
config.close()
config_lines = [line.replace(old, new) for line in config_lines]
config = open(config_path, 'w')
config = open(config_path, "w")
config.writelines(config_lines)
config.close()

View File

@ -19,14 +19,19 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=[
'configs/no_system_log.xml',
'configs/asynchronous_metrics_update_period_s.xml',
], user_configs=[
'configs/users.d/overrides.xml',
])
node = cluster.add_instance(
"node",
main_configs=[
"configs/no_system_log.xml",
"configs/asynchronous_metrics_update_period_s.xml",
],
user_configs=[
"configs/users.d/overrides.xml",
],
)
@pytest.fixture(scope='module', autouse=True)
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
@ -34,31 +39,39 @@ def start_cluster():
finally:
cluster.shutdown()
query_settings = {
'max_threads': 1,
'log_queries': 0,
"max_threads": 1,
"log_queries": 0,
}
sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON"
def query(*args, **kwargs):
if 'settings' not in kwargs:
kwargs['settings'] = query_settings
if "settings" not in kwargs:
kwargs["settings"] = query_settings
else:
kwargs['settings'].update(query_settings)
kwargs["settings"].update(query_settings)
return node.query(*args, **kwargs)
def http_query(*args, **kwargs):
if 'params' not in kwargs:
kwargs['params'] = query_settings
if "params" not in kwargs:
kwargs["params"] = query_settings
else:
kwargs['params'].update(query_settings)
kwargs["params"].update(query_settings)
return node.http_query(*args, **kwargs)
def get_MemoryTracking():
return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'"))
return int(
http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")
)
def check_memory(memory):
# bytes -> megabytes
memory = [*map(lambda x: int(int(x)/1024/1024), memory)]
memory = [*map(lambda x: int(int(x) / 1024 / 1024), memory)]
# 3 changes to MemoryTracking is minimum, since:
# - this is not that high to not detect inacuracy
# - memory can go like X/X+N due to some background allocations
@ -66,14 +79,19 @@ def check_memory(memory):
changes_allowed = 3
# if number of samples is large enough, use 10% from them
# (actually most of the time there will be only few changes, it was made 10% to avoid flackiness)
changes_allowed_auto=int(len(memory) * 0.1)
changes_allowed_auto = int(len(memory) * 0.1)
changes_allowed = max(changes_allowed_auto, changes_allowed)
changed=len(set(memory))
logging.info('Changes: allowed=%s, actual=%s, sample=%s',
changes_allowed, changed, len(memory))
changed = len(set(memory))
logging.info(
"Changes: allowed=%s, actual=%s, sample=%s",
changes_allowed,
changed,
len(memory),
)
assert changed < changes_allowed
def test_http():
memory = []
memory.append(get_MemoryTracking())
@ -82,6 +100,7 @@ def test_http():
memory.append(get_MemoryTracking())
check_memory(memory)
def test_tcp_multiple_sessions():
memory = []
memory.append(get_MemoryTracking())
@ -90,6 +109,7 @@ def test_tcp_multiple_sessions():
memory.append(get_MemoryTracking())
check_memory(memory)
def test_tcp_single_session():
memory = []
memory.append(get_MemoryTracking())
@ -97,9 +117,9 @@ def test_tcp_single_session():
sample_query,
"SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'",
] * 100
rows = query(';'.join(sample_queries))
memory = rows.split('\n')
memory = filter(lambda x: x.startswith('MemoryTracking'), memory)
memory = map(lambda x: x.split('\t')[1], memory)
rows = query(";".join(sample_queries))
memory = rows.split("\n")
memory = filter(lambda x: x.startswith("MemoryTracking"), memory)
memory = map(lambda x: x.split("\t")[1], memory)
memory = [*memory]
check_memory(memory)

View File

@ -2,9 +2,15 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
ch1 = cluster.add_instance(
"ch1", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
)
ch2 = cluster.add_instance(
"ch2", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
)
ch3 = cluster.add_instance(
"ch3", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module", autouse=True)
@ -18,17 +24,23 @@ def started_cluster():
def test_access_control_on_cluster():
ch1.query_with_retry("CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5)
ch1.query_with_retry(
"CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5
)
assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
ch2.query_with_retry("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3)
ch2.query_with_retry(
"GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3
)
assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
ch3.query_with_retry("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3)
ch3.query_with_retry(
"REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3
)
assert ch1.query("SHOW GRANTS FOR Alex") == ""
assert ch2.query("SHOW GRANTS FOR Alex") == ""
assert ch3.query("SHOW GRANTS FOR Alex") == ""

View File

@ -3,7 +3,7 @@ import uuid
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance', stay_alive=True)
instance = cluster.add_instance("instance", stay_alive=True)
@pytest.fixture(scope="module", autouse=True)
@ -21,21 +21,32 @@ def test_access_rights_for_function():
instance.query("CREATE USER A")
instance.query("CREATE USER B")
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A')
assert (
"it's necessary to have grant CREATE FUNCTION ON *.*"
in instance.query_and_get_error(create_function_query, user="A")
)
instance.query("GRANT CREATE FUNCTION on *.* TO A")
instance.query(create_function_query, user = 'A')
instance.query(create_function_query, user="A")
assert instance.query("SELECT MySum(1, 2)") == "3\n"
assert "it's necessary to have grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user = 'B')
assert (
"it's necessary to have grant DROP FUNCTION ON *.*"
in instance.query_and_get_error("DROP FUNCTION MySum", user="B")
)
instance.query("GRANT DROP FUNCTION ON *.* TO B")
instance.query("DROP FUNCTION MySum", user = 'B')
assert "Unknown function MySum" in instance.query_and_get_error("SELECT MySum(1, 2)")
instance.query("DROP FUNCTION MySum", user="B")
assert "Unknown function MySum" in instance.query_and_get_error(
"SELECT MySum(1, 2)"
)
instance.query("REVOKE CREATE FUNCTION ON *.* FROM A")
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A')
assert (
"it's necessary to have grant CREATE FUNCTION ON *.*"
in instance.query_and_get_error(create_function_query, user="A")
)
instance.query("DROP USER IF EXISTS A")
instance.query("DROP USER IF EXISTS B")
@ -45,13 +56,21 @@ def test_ignore_obsolete_grant_on_database():
instance.stop_clickhouse()
user_id = uuid.uuid4()
instance.exec_in_container(["bash", "-c" , f"""
instance.exec_in_container(
[
"bash",
"-c",
f"""
cat > /var/lib/clickhouse/access/{user_id}.sql << EOF
ATTACH USER X;
ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X;
EOF"""])
EOF""",
]
)
instance.exec_in_container(["bash", "-c" , "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"])
instance.exec_in_container(
["bash", "-c", "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]
)
instance.start_clickhouse()
assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n"

View File

@ -3,8 +3,8 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
node2 = cluster.add_instance('node2')
node1 = cluster.add_instance("node1")
node2 = cluster.add_instance("node2")
@pytest.fixture(scope="module")
@ -14,10 +14,15 @@ def start_cluster():
for node in [node1, node2]:
node.query(
"create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;")
"create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;"
)
node1.query("insert into da_memory_efficient_shard select number, number from numbers(100000);")
node2.query("insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);")
node1.query(
"insert into da_memory_efficient_shard select number, number from numbers(100000);"
)
node2.query(
"insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);"
)
yield cluster
@ -27,23 +32,29 @@ def start_cluster():
def test_remote(start_cluster):
node1.query(
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1")
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
)
res = node1.query(
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)")
assert res == '200000\n'
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
)
assert res == "200000\n"
node1.query("set distributed_aggregation_memory_efficient = 0")
res = node1.query(
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)")
assert res == '200000\n'
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
)
assert res == "200000\n"
node1.query(
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1")
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
)
res = node1.query(
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;")
assert res == 'node1\t100000\nnode2\t100000\n'
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
)
assert res == "node1\t100000\nnode2\t100000\n"
node1.query("set distributed_aggregation_memory_efficient = 0")
res = node1.query(
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;")
assert res == 'node1\t100000\nnode2\t100000\n'
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
)
assert res == "node1\t100000\nnode2\t100000\n"

View File

@ -2,31 +2,42 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
server = cluster.add_instance('server', user_configs=["configs/users.d/network.xml"])
server = cluster.add_instance("server", user_configs=["configs/users.d/network.xml"])
clientA1 = cluster.add_instance('clientA1', hostname='clientA1.com')
clientA2 = cluster.add_instance('clientA2', hostname='clientA2.com')
clientA3 = cluster.add_instance('clientA3', hostname='clientA3.com')
clientB1 = cluster.add_instance('clientB1', hostname='clientB001.ru')
clientB2 = cluster.add_instance('clientB2', hostname='clientB002.ru')
clientB3 = cluster.add_instance('clientB3', hostname='xxx.clientB003.rutracker.com')
clientC1 = cluster.add_instance('clientC1', hostname='clientC01.ru')
clientC2 = cluster.add_instance('clientC2', hostname='xxx.clientC02.ru')
clientC3 = cluster.add_instance('clientC3', hostname='xxx.clientC03.rutracker.com')
clientD1 = cluster.add_instance('clientD1', hostname='clientD0001.ru')
clientD2 = cluster.add_instance('clientD2', hostname='xxx.clientD0002.ru')
clientD3 = cluster.add_instance('clientD3', hostname='clientD0003.ru')
clientA1 = cluster.add_instance("clientA1", hostname="clientA1.com")
clientA2 = cluster.add_instance("clientA2", hostname="clientA2.com")
clientA3 = cluster.add_instance("clientA3", hostname="clientA3.com")
clientB1 = cluster.add_instance("clientB1", hostname="clientB001.ru")
clientB2 = cluster.add_instance("clientB2", hostname="clientB002.ru")
clientB3 = cluster.add_instance("clientB3", hostname="xxx.clientB003.rutracker.com")
clientC1 = cluster.add_instance("clientC1", hostname="clientC01.ru")
clientC2 = cluster.add_instance("clientC2", hostname="xxx.clientC02.ru")
clientC3 = cluster.add_instance("clientC3", hostname="xxx.clientC03.rutracker.com")
clientD1 = cluster.add_instance("clientD1", hostname="clientD0001.ru")
clientD2 = cluster.add_instance("clientD2", hostname="xxx.clientD0002.ru")
clientD3 = cluster.add_instance("clientD3", hostname="clientD0003.ru")
def check_clickhouse_is_ok(client_node, server_node):
assert client_node.exec_in_container(
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]) == "Ok.\n"
assert (
client_node.exec_in_container(
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]
)
== "Ok.\n"
)
def query_from_one_node_to_another(client_node, server_node, query):
check_clickhouse_is_ok(client_node, server_node)
return client_node.exec_in_container(
["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)])
[
"bash",
"-c",
"/usr/bin/clickhouse client --host {} --query {!r}".format(
server_node.hostname, query
),
]
)
def query(node, query):
@ -38,7 +49,10 @@ def setup_nodes():
try:
cluster.start()
query(server, "DROP TABLE IF EXISTS test_allowed_client_hosts")
query(server, "CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()")
query(
server,
"CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()",
)
query(server, "INSERT INTO test_allowed_client_hosts VALUES (5)")
yield cluster
@ -58,8 +72,15 @@ def test_allowed_host():
# expected_to_fail.extend([clientC3, clientD2])
for client_node in expected_to_pass:
assert query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts") == "5\n"
assert (
query_from_one_node_to_another(
client_node, server, "SELECT * FROM test_allowed_client_hosts"
)
== "5\n"
)
for client_node in expected_to_fail:
with pytest.raises(Exception, match=r'default: Authentication failed'):
query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts")
with pytest.raises(Exception, match=r"default: Authentication failed"):
query_from_one_node_to_another(
client_node, server, "SELECT * FROM test_allowed_client_hosts"
)

View File

@ -2,13 +2,23 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml'])
node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml'])
node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml'])
node4 = cluster.add_instance('node4', main_configs=[]) # No `remote_url_allow_hosts` at all.
node5 = cluster.add_instance('node5', main_configs=['configs/config_without_allowed_hosts.xml'])
node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml'])
node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True)
node1 = cluster.add_instance("node1", main_configs=["configs/config_with_hosts.xml"])
node2 = cluster.add_instance(
"node2", main_configs=["configs/config_with_only_primary_hosts.xml"]
)
node3 = cluster.add_instance(
"node3", main_configs=["configs/config_with_only_regexp_hosts.xml"]
)
node4 = cluster.add_instance(
"node4", main_configs=[]
) # No `remote_url_allow_hosts` at all.
node5 = cluster.add_instance(
"node5", main_configs=["configs/config_without_allowed_hosts.xml"]
)
node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"])
node7 = cluster.add_instance(
"node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True
)
@pytest.fixture(scope="module")
@ -21,97 +31,229 @@ def start_cluster():
def test_config_with_hosts(start_cluster):
assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == ""
assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert (
node1.query(
"CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)"
)
== ""
)
assert (
node1.query(
"CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert "not allowed" in node1.query_and_get_error(
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)")
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)"
)
assert "not allowed" in node1.query_and_get_error(
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)")
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)"
)
def test_config_with_only_primary_hosts(start_cluster):
assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == ""
assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == ""
assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == ""
assert (
node2.query(
"CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)"
)
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)"
)
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert (
node2.query(
"CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)"
)
== ""
)
assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)")
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)"
)
assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)")
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)"
)
assert "not allowed" in node2.query_and_get_error(
"CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)")
"CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)"
)
def test_config_with_only_regexp_hosts(start_cluster):
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert (
node3.query(
"CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)"
)
== ""
)
assert (
node3.query(
"CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert "not allowed" in node3.query_and_get_error(
"CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
"CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)"
)
assert "not allowed" in node3.query_and_get_error(
"CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
"CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)"
)
def test_config_without_allowed_hosts_section(start_cluster):
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)") == ""
assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)") == ""
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert node4.query("CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)") == ""
assert (
node4.query(
"CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)"
)
== ""
)
assert (
node4.query(
"CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)"
)
== ""
)
def test_config_without_allowed_hosts(start_cluster):
assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)")
"CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)"
)
assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)")
"CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
)
assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)")
"CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)"
)
assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)")
"CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)"
)
assert "not allowed" in node5.query_and_get_error(
"CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)")
"CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)"
)
def test_table_function_remote(start_cluster):
assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remote('example01-0{1,2}-1', system, events",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed in configuration file" not in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)",
settings={"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remote('example01-01-{1|3}', system, events)",
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert "not allowed" in node6.query_and_get_error(
"SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)",
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
settings={
"connections_with_failover_max_tries": 1,
"connect_timeout_with_failover_ms": 1000,
"connect_timeout_with_failover_secure_ms": 1000,
"connect_timeout": 1,
"send_timeout": 1,
},
)
assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error(
"SELECT * FROM remoteSecure('localhost:800', system, events)")
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error(
"SELECT * FROM remote('localhost:800', system, metrics)")
assert (
'URL "localhost:800" is not allowed in configuration file'
in node6.query_and_get_error(
"SELECT * FROM remoteSecure('localhost:800', system, events)"
)
)
assert (
'URL "localhost:800" is not allowed in configuration file'
in node6.query_and_get_error(
"SELECT * FROM remote('localhost:800', system, metrics)"
)
)
def test_redirect(start_cluster):
@ -120,12 +262,17 @@ def test_redirect(start_cluster):
hdfs_api.write_data("/simple_storage", "1\t\n")
assert hdfs_api.read_data("/simple_storage") == "1\t\n"
node7.query(
"CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)")
assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1")
"CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)"
)
assert "not allowed" in node7.query_and_get_error(
"SET max_http_get_redirects=1; SELECT * from table_test_7_1"
)
def test_HDFS(start_cluster):
assert "not allowed" in node7.query_and_get_error(
"CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')")
"CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')"
)
assert "not allowed" in node7.query_and_get_error(
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')")
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')"
)

View File

@ -4,8 +4,7 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1',
main_configs=['configs/logs_config.xml'])
node1 = cluster.add_instance("node1", main_configs=["configs/logs_config.xml"])
@pytest.fixture(scope="module")
@ -21,30 +20,60 @@ def started_cluster():
def test_alter_codec_pk(started_cluster):
try:
name = "test_alter_codec_pk"
node1.query("""
node1.query(
"""
CREATE TABLE {name} (id UInt64, value UInt64) Engine=MergeTree() ORDER BY id
""".format(name=name))
""".format(
name=name
)
)
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name))
node1.query(
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name))
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
)
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
name=name
)
)
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4950\n"
with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
@ -61,28 +90,58 @@ def test_alter_codec_pk(started_cluster):
def test_alter_codec_index(started_cluster):
try:
name = "test_alter_codec_index"
node1.query("""
node1.query(
"""
CREATE TABLE {name} (`id` UInt64, value UInt64, INDEX id_index id TYPE minmax GRANULARITY 1) Engine=MergeTree() ORDER BY tuple()
""".format(name=name))
""".format(
name=name
)
)
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name))
node1.query(
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name))
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
)
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
name=name
)
)
with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
with pytest.raises(QueryRuntimeException):
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name))
node1.query(
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
name=name
)
)
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))

View File

@ -4,11 +4,18 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
node1 = cluster.add_instance(
"node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node2 = cluster.add_instance(
"node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node3 = cluster.add_instance(
"node3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node4 = cluster.add_instance(
"node4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module")
@ -17,19 +24,31 @@ def started_cluster():
cluster.start()
for node in [node1, node2]:
node.query_with_retry('''
node.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id;
'''.format(replica=node.name))
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
""".format(
replica=node.name
)
)
node.query_with_retry(
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
)
for node in [node3, node4]:
node.query_with_retry('''
node.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id;
'''.format(replica=node.name))
""".format(
replica=node.name
)
)
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
node.query_with_retry(
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
)
yield cluster
@ -46,17 +65,23 @@ def test_alter_on_cluter_non_replicated(started_cluster):
assert node3.query("SELECT COUNT() FROM test_table") == "1\n"
assert node4.query("SELECT COUNT() FROM test_table") == "1\n"
node1.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime")
node1.query(
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime"
)
assert node1.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node2.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node3.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node4.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
assert node1.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node2.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node3.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
assert node4.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
node3.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String")
node3.query(
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String"
)
for node in [node1, node2, node3, node4]:
node.query("INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
node.query(
"INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
)
assert node1.query("SELECT COUNT() FROM test_table") == "2\n"
assert node2.query("SELECT COUNT() FROM test_table") == "2\n"
@ -66,22 +91,40 @@ def test_alter_on_cluter_non_replicated(started_cluster):
def test_alter_replicated_on_cluster(started_cluster):
for node in [node1, node3]:
node.query("INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)")
node.query(
"INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)"
)
for node in [node2, node4]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
node1.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime", settings={"replication_alter_partitions_sync": "2"})
node1.query(
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime",
settings={"replication_alter_partitions_sync": "2"},
)
assert node1.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node2.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node3.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert node4.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
assert (
node1.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
assert (
node2.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
assert (
node3.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
assert (
node4.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
)
node3.query_with_retry("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String", settings={"replication_alter_partitions_sync": "2"})
node3.query_with_retry(
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String",
settings={"replication_alter_partitions_sync": "2"},
)
for node in [node2, node4]:
node.query("INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
node.query(
"INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
)
for node in [node1, node3]:
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)

View File

@ -3,7 +3,10 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', user_configs=['configs/users.xml'], with_zookeeper=True)
node1 = cluster.add_instance(
"node1", user_configs=["configs/users.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module")
def started_cluster():
@ -13,24 +16,30 @@ def started_cluster():
finally:
cluster.shutdown()
def test_cast_keep_nullable(started_cluster):
setting = node1.query("SELECT value FROM system.settings WHERE name='cast_keep_nullable'")
assert(setting.strip() == "1")
result = node1.query("""
def test_cast_keep_nullable(started_cluster):
setting = node1.query(
"SELECT value FROM system.settings WHERE name='cast_keep_nullable'"
)
assert setting.strip() == "1"
result = node1.query(
"""
DROP TABLE IF EXISTS t;
CREATE TABLE t (x UInt64) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t SELECT number FROM numbers(10);
SELECT * FROM t;
""")
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9")
"""
)
assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"
error = node1.query_and_get_error("""
error = node1.query_and_get_error(
"""
SET mutations_sync = 1;
ALTER TABLE t UPDATE x = x % 3 = 0 ? NULL : x WHERE x % 2 = 1; 
""")
assert("DB::Exception: Cannot convert NULL value to non-Nullable type" in error)
"""
)
assert "DB::Exception: Cannot convert NULL value to non-Nullable type" in error
result = node1.query("SELECT * FROM t;")
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9")
assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"

View File

@ -6,8 +6,8 @@ from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True)
node1 = cluster.add_instance("node1", with_zookeeper=True)
node2 = cluster.add_instance("node2", with_zookeeper=True)
@pytest.fixture(scope="module")
@ -22,21 +22,25 @@ def started_cluster():
def test_replica_always_download(started_cluster):
node1.query_with_retry("""
node1.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table(
key UInt64,
value String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '1')
ORDER BY tuple()
""")
node2.query_with_retry("""
"""
)
node2.query_with_retry(
"""
CREATE TABLE IF NOT EXISTS test_table(
key UInt64,
value String
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '2')
ORDER BY tuple()
SETTINGS always_fetch_merged_part=1
""")
"""
)
# Stop merges on single node
node1.query("SYSTEM STOP MERGES")
@ -50,15 +54,29 @@ def test_replica_always_download(started_cluster):
time.sleep(5)
# Nothing is merged
assert node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n"
assert node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n"
assert (
node1.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
)
== "10\n"
)
assert (
node2.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
)
== "10\n"
)
node1.query("SYSTEM START MERGES")
node1.query("OPTIMIZE TABLE test_table")
node2.query("SYSTEM SYNC REPLICA test_table")
node1_parts = node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip()
node2_parts = node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip()
node1_parts = node1.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
).strip()
node2_parts = node2.query(
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
).strip()
assert int(node1_parts) < 10
assert int(node2_parts) < 10

View File

@ -5,17 +5,19 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/config.xml'])
node = cluster.add_instance("node", main_configs=["configs/config.xml"])
@pytest.fixture(scope='module')
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
node.query("""
node.query(
"""
create table t (number UInt64)
engine = Distributed(test_cluster_two_shards, system, numbers)
""")
"""
)
yield cluster
finally:
@ -25,12 +27,15 @@ def started_cluster():
def test_filled_async_drain_connection_pool(started_cluster):
def execute_queries(_):
for _ in range(100):
node.query('select * from t where number = 0 limit 2', settings={
'sleep_in_receive_cancel_ms': int(10e6),
'max_execution_time': 5,
# decrease drain_timeout to make test more stable
# (another way is to increase max_execution_time, but this will make test slower)
'drain_timeout': 1,
})
node.query(
"select * from t where number = 0 limit 2",
settings={
"sleep_in_receive_cancel_ms": int(10e6),
"max_execution_time": 5,
# decrease drain_timeout to make test more stable
# (another way is to increase max_execution_time, but this will make test slower)
"drain_timeout": 1,
},
)
any(map(execute_queries, range(10)))

View File

@ -4,8 +4,11 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True,
main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
node1 = cluster.add_instance(
"node1",
with_zookeeper=True,
main_configs=["configs/asynchronous_metrics_update_period_s.xml"],
)
@pytest.fixture(scope="module")
@ -27,20 +30,20 @@ def test_event_time_microseconds_field(started_cluster):
cluster.start()
node1.query("SET log_queries = 1;")
node1.query("CREATE DATABASE replica;")
query_create = '''CREATE TABLE replica.test
query_create = """CREATE TABLE replica.test
(
id Int64,
event_time DateTime
)
Engine=MergeTree()
PARTITION BY toYYYYMMDD(event_time)
ORDER BY id;'''
ORDER BY id;"""
time.sleep(2)
node1.query(query_create)
node1.query('''INSERT INTO replica.test VALUES (1, now())''')
node1.query("""INSERT INTO replica.test VALUES (1, now())""")
node1.query("SYSTEM FLUSH LOGS;")
# query assumes that the event_time field is accurate
equals_query = '''WITH (
equals_query = """WITH (
(
SELECT event_time_microseconds
FROM system.asynchronous_metric_log
@ -53,7 +56,7 @@ def test_event_time_microseconds_field(started_cluster):
ORDER BY event_time DESC
LIMIT 1
) AS time)
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')'''
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')"""
assert "ok\n" in node1.query(equals_query)
finally:
cluster.shutdown()

View File

@ -5,21 +5,29 @@ from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=["configs/config.d/zookeeper_session_timeout.xml",
"configs/remote_servers.xml"], with_zookeeper=True)
node1 = cluster.add_instance(
"node1",
main_configs=[
"configs/config.d/zookeeper_session_timeout.xml",
"configs/remote_servers.xml",
],
with_zookeeper=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
node1.query("CREATE DATABASE zktest ENGINE=Ordinary;") # Different behaviour with Atomic
node1.query(
'''
"CREATE DATABASE zktest ENGINE=Ordinary;"
) # Different behaviour with Atomic
node1.query(
"""
CREATE TABLE zktest.atomic_drop_table (n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/zktest/tables/atomic_drop_table', 'node1')
PARTITION BY n ORDER BY n
'''
"""
)
yield cluster
finally:
@ -31,8 +39,10 @@ def test_atomic_delete_with_stopped_zookeeper(start_cluster):
with PartitionManager() as pm:
pm.drop_instance_zk_connections(node1)
error = node1.query_and_get_error("DROP TABLE zktest.atomic_drop_table") # Table won't drop
error = node1.query_and_get_error(
"DROP TABLE zktest.atomic_drop_table"
) # Table won't drop
assert error != ""
time.sleep(5)
assert '8192' in node1.query("select * from zktest.atomic_drop_table")
assert "8192" in node1.query("select * from zktest.atomic_drop_table")

View File

@ -3,7 +3,9 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True)
node = cluster.add_instance(
"node", main_configs=["configs/config.xml"], with_zookeeper=True
)
@pytest.fixture(scope="module")
@ -14,18 +16,35 @@ def started_cluster():
finally:
cluster.shutdown()
def create_force_drop_flag(node):
force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table"
node.exec_in_container(["bash", "-c", "touch {} && chmod a=rw {}".format(force_drop_flag_path, force_drop_flag_path)], user="root")
node.exec_in_container(
[
"bash",
"-c",
"touch {} && chmod a=rw {}".format(
force_drop_flag_path, force_drop_flag_path
),
],
user="root",
)
@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic'])
@pytest.mark.parametrize("engine", ["Ordinary", "Atomic"])
def test_attach_partition_with_large_destination(started_cluster, engine):
# Initialize
node.query("CREATE DATABASE db ENGINE={}".format(engine))
node.query("CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2")
node.query("CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2")
node.query(
"CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query(
"CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)")
node.query("CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2")
node.query(
"CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2"
)
node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)")
# Attach partition when destination partition is empty
@ -33,7 +52,9 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
# REPLACE PARTITION should still respect max_partition_size_to_drop
assert node.query_and_get_error("ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2")
assert node.query_and_get_error(
"ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2"
)
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
# Attach partition when destination partition is larger than max_partition_size_to_drop
@ -47,4 +68,4 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
node.query("DROP TABLE db.source_2 SYNC")
create_force_drop_flag(node)
node.query("DROP TABLE db.destination SYNC")
node.query("DROP DATABASE db")
node.query("DROP DATABASE db")

View File

@ -3,7 +3,8 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
node1 = cluster.add_instance("node1")
@pytest.fixture(scope="module")
def start_cluster():
@ -17,9 +18,12 @@ def start_cluster():
def test_attach_without_checksums(start_cluster):
node1.query(
"CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date")
"CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date"
)
node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)")
node1.query(
"INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)"
)
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
@ -30,15 +34,27 @@ def test_attach_without_checksums(start_cluster):
# to be sure output not empty
node1.exec_in_container(
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '],
privileged=True, user='root')
[
"bash",
"-c",
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" ',
],
privileged=True,
user="root",
)
node1.exec_in_container(
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'],
privileged=True, user='root')
[
"bash",
"-c",
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete',
],
privileged=True,
user="root",
)
node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
assert node1.query("SELECT COUNT() FROM test") == "100\n"
node1.query("DROP TABLE test")
node1.query("DROP TABLE test")

View File

@ -7,19 +7,25 @@ from helpers.test_tools import assert_eq_with_retry
from helpers.network import PartitionManager
from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path
def fill_node(node):
node.query_with_retry(
'''
"""
CREATE TABLE IF NOT EXISTS test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
ORDER BY n PARTITION BY n % 10;
'''.format(replica=node.name))
""".format(
replica=node.name
)
)
cluster = ClickHouseCluster(__file__)
node_1 = cluster.add_instance('replica1', with_zookeeper=True)
node_2 = cluster.add_instance('replica2', with_zookeeper=True)
node_3 = cluster.add_instance('replica3', with_zookeeper=True)
node_1 = cluster.add_instance("replica1", with_zookeeper=True)
node_2 = cluster.add_instance("replica2", with_zookeeper=True)
node_3 = cluster.add_instance("replica3", with_zookeeper=True)
@pytest.fixture(scope="module")
def start_cluster():
@ -36,27 +42,42 @@ def start_cluster():
finally:
cluster.shutdown()
def check_data(nodes, detached_parts):
for node in nodes:
print("> Replication queue for", node.name, "\n> table\treplica_name\tsource_replica\ttype\tposition\n",
node.query_with_retry("SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"))
print(
"> Replication queue for",
node.name,
"\n> table\treplica_name\tsource_replica\ttype\tposition\n",
node.query_with_retry(
"SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"
),
)
node.query_with_retry("SYSTEM SYNC REPLICA test")
print("> Checking data integrity for", node.name)
for i in range(10):
assert_eq_with_retry(node, "SELECT count() FROM test WHERE n % 10 == " + str(i),
"0\n" if i in detached_parts else "10\n")
assert_eq_with_retry(
node,
"SELECT count() FROM test WHERE n % 10 == " + str(i),
"0\n" if i in detached_parts else "10\n",
)
assert_eq_with_retry(node, "SELECT count() FROM system.parts WHERE table='test'",
str(10 - len(detached_parts)) + "\n")
assert_eq_with_retry(
node,
"SELECT count() FROM system.parts WHERE table='test'",
str(10 - len(detached_parts)) + "\n",
)
res: str = node.query("SELECT * FROM test ORDER BY n")
for other in nodes:
if other != node:
logging.debug(f"> Checking data consistency, {other.name} vs {node.name}")
logging.debug(
f"> Checking data consistency, {other.name} vs {node.name}"
)
assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res)
@ -83,7 +104,6 @@ def test_attach_without_fetching(start_cluster):
# files missing.
node_1.query("ALTER TABLE test DETACH PARTITION 2")
check_data([node_1, node_2], detached_parts=[0, 1, 2])
# 2. Create the third replica
@ -94,14 +114,28 @@ def test_attach_without_fetching(start_cluster):
# Replica 2 should also download the data from 1 as the checksums won't match.
logging.debug("Checking attach with corrupted part data with files missing")
to_delete = node_2.exec_in_container(['bash', '-c',
'cd {p} && ls *.bin'.format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
to_delete = node_2.exec_in_container(
[
"bash",
"-c",
"cd {p} && ls *.bin".format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
],
privileged=True,
)
logging.debug(f"Before deleting: {to_delete}")
node_2.exec_in_container(['bash', '-c',
'cd {p} && rm -fr *.bin'.format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
node_2.exec_in_container(
[
"bash",
"-c",
"cd {p} && rm -fr *.bin".format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
],
privileged=True,
)
node_1.query("ALTER TABLE test ATTACH PARTITION 2")
check_data([node_1, node_2, node_3], detached_parts=[0, 1])
@ -111,7 +145,9 @@ def test_attach_without_fetching(start_cluster):
# Replica 2 should also download the data from 1 as the checksums won't match.
print("Checking attach with corrupted part data with all of the files present")
corrupt_part_data_by_path(node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0")
corrupt_part_data_by_path(
node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0"
)
node_1.query("ALTER TABLE test ATTACH PARTITION 1")
check_data([node_1, node_2, node_3], detached_parts=[0])
@ -123,8 +159,8 @@ def test_attach_without_fetching(start_cluster):
with PartitionManager() as pm:
# If something goes wrong and replica 2 wants to fetch data, the test will fail.
pm.partition_instances(node_2, node_1, action='REJECT --reject-with tcp-reset')
pm.partition_instances(node_1, node_3, action='REJECT --reject-with tcp-reset')
pm.partition_instances(node_2, node_1, action="REJECT --reject-with tcp-reset")
pm.partition_instances(node_1, node_3, action="REJECT --reject-with tcp-reset")
node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'")

View File

@ -2,7 +2,7 @@ import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance')
instance = cluster.add_instance("instance")
@pytest.fixture(scope="module", autouse=True)
@ -20,18 +20,30 @@ def setup_nodes():
def test_authentication_pass():
assert instance.query("SELECT currentUser()", user='sasha') == 'sasha\n'
assert instance.query("SELECT currentUser()", user='masha', password='qwerty') == 'masha\n'
assert instance.query("SELECT currentUser()", user="sasha") == "sasha\n"
assert (
instance.query("SELECT currentUser()", user="masha", password="qwerty")
== "masha\n"
)
# 'no_password' authentication type allows to login with any password.
assert instance.query("SELECT currentUser()", user='sasha', password='something') == 'sasha\n'
assert instance.query("SELECT currentUser()", user='sasha', password='something2') == 'sasha\n'
assert (
instance.query("SELECT currentUser()", user="sasha", password="something")
== "sasha\n"
)
assert (
instance.query("SELECT currentUser()", user="sasha", password="something2")
== "sasha\n"
)
def test_authentication_fail():
# User doesn't exist.
assert "vasya: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='vasya')
assert "vasya: Authentication failed" in instance.query_and_get_error(
"SELECT currentUser()", user="vasya"
)
# Wrong password.
assert "masha: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='masha',
password='123')
assert "masha: Authentication failed" in instance.query_and_get_error(
"SELECT currentUser()", user="masha", password="123"
)

View File

@ -17,12 +17,20 @@ CLUSTER_NAME = "test_cluster"
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance(NODE1, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '1'},
with_azurite=True,
with_zookeeper=True)
cluster.add_instance(NODE2, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '2'},
with_azurite=True,
with_zookeeper=True)
cluster.add_instance(
NODE1,
main_configs=["configs/config.d/storage_conf.xml"],
macros={"replica": "1"},
with_azurite=True,
with_zookeeper=True,
)
cluster.add_instance(
NODE2,
main_configs=["configs/config.d/storage_conf.xml"],
macros={"replica": "2"},
with_azurite=True,
with_zookeeper=True,
)
logging.info("Starting cluster...")
cluster.start()
logging.info("Cluster started")
@ -53,7 +61,10 @@ def create_table(node, table_name, replica, **additional_settings):
def get_large_objects_count(blob_container_client, large_size_threshold=100):
return sum(blob['size'] > large_size_threshold for blob in blob_container_client.list_blobs())
return sum(
blob["size"] > large_size_threshold
for blob in blob_container_client.list_blobs()
)
def test_zero_copy_replication(cluster):
@ -61,15 +72,21 @@ def test_zero_copy_replication(cluster):
node2 = cluster.instances[NODE2]
create_table(node1, TABLE_NAME, 1)
blob_container_client = cluster.blob_service_client.get_container_client(CONTAINER_NAME)
blob_container_client = cluster.blob_service_client.get_container_client(
CONTAINER_NAME
)
values1 = "(0,'data'),(1,'data')"
values2 = "(2,'data'),(3,'data')"
node1.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}")
node2.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
assert (
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
assert (
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
)
# Based on version 21.x - should be only one file with size 100+ (checksums.txt), used by both nodes
assert get_large_objects_count(blob_container_client) == 1
@ -77,7 +94,13 @@ def test_zero_copy_replication(cluster):
node2.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}")
node1.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2
assert (
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)
assert (
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
== values1 + "," + values2
)
assert get_large_objects_count(blob_container_client) == 2

View File

@ -6,25 +6,35 @@ from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('node')
path_to_data = '/var/lib/clickhouse/'
instance = cluster.add_instance("node")
path_to_data = "/var/lib/clickhouse/"
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query('CREATE DATABASE test ENGINE = Ordinary') # Different path in shadow/ with Atomic
instance.query(
"CREATE DATABASE test ENGINE = Ordinary"
) # Different path in shadow/ with Atomic
instance.query("DROP TABLE IF EXISTS test.tbl")
instance.query("CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p")
instance.query(
"CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p"
)
for i in range(1, 4):
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(31, 34):
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
expected = TSV(
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl FREEZE")
@ -33,21 +43,24 @@ def started_cluster():
finally:
cluster.shutdown()
def get_last_backup_path(instance, database, table):
fp_increment = os.path.join(path_to_data, 'shadow/increment.txt')
increment = instance.exec_in_container(['cat', fp_increment]).strip()
return os.path.join(path_to_data, 'shadow', increment, 'data', database, table)
fp_increment = os.path.join(path_to_data, "shadow/increment.txt")
increment = instance.exec_in_container(["cat", fp_increment]).strip()
return os.path.join(path_to_data, "shadow", increment, "data", database, table)
def copy_backup_to_detached(instance, database, src_table, dst_table):
fp_backup = os.path.join(path_to_data, 'shadow', '*', 'data', database, src_table)
fp_detached = os.path.join(path_to_data, 'data', database, dst_table, 'detached')
logging.debug(f'copy from {fp_backup} to {fp_detached}')
instance.exec_in_container(['bash', '-c', f'cp -r {fp_backup} -T {fp_detached}'])
fp_backup = os.path.join(path_to_data, "shadow", "*", "data", database, src_table)
fp_detached = os.path.join(path_to_data, "data", database, dst_table, "detached")
logging.debug(f"copy from {fp_backup} to {fp_detached}")
instance.exec_in_container(["bash", "-c", f"cp -r {fp_backup} -T {fp_detached}"])
def test_restore(started_cluster):
instance.query("CREATE TABLE test.tbl1 AS test.tbl")
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl1')
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl1")
# The data_version of parts to be attached are larger than the newly created table's data_version.
instance.query("ALTER TABLE test.tbl1 ATTACH PARTITION 197001")
@ -55,17 +68,21 @@ def test_restore(started_cluster):
instance.query("SELECT sleep(2)")
# Validate the attached parts are identical to the backup.
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
expected = TSV(
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl1 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all attached parts.
expected = TSV('1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10')
expected = TSV(
"1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
)
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl1")
@ -73,15 +90,19 @@ def test_restore(started_cluster):
def test_attach_partition(started_cluster):
instance.query("CREATE TABLE test.tbl2 AS test.tbl")
for i in range(3, 5):
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(33, 35):
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34')
expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl2')
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl2")
# The data_version of parts to be attached
# - may be less than, equal to or larger than the current table's data_version.
@ -91,18 +112,20 @@ def test_attach_partition(started_cluster):
instance.query("SELECT sleep(2)")
expected = TSV(
'1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34')
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34"
)
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl2 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all attached parts.
expected = TSV(
'1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10')
"1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10"
)
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl2")
@ -110,15 +133,19 @@ def test_attach_partition(started_cluster):
def test_replace_partition(started_cluster):
instance.query("CREATE TABLE test.tbl3 AS test.tbl")
for i in range(3, 5):
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
)
for i in range(33, 35):
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i))
instance.query(
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
)
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34')
expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl3')
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl3")
# The data_version of parts to be copied
# - may be less than, equal to or larger than the current table data_version.
@ -126,35 +153,56 @@ def test_replace_partition(started_cluster):
instance.query("ALTER TABLE test.tbl3 REPLACE PARTITION 197002 FROM test.tbl")
instance.query("SELECT sleep(2)")
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
expected = TSV(
"1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
)
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("ALTER TABLE test.tbl3 UPDATE k=10 WHERE 1")
instance.query("SELECT sleep(2)")
# Validate mutation has been applied to all copied parts.
expected = TSV('1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10')
expected = TSV(
"1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
)
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
assert (TSV(res) == expected)
assert TSV(res) == expected
instance.query("DROP TABLE IF EXISTS test.tbl3")
def test_freeze_in_memory(started_cluster):
instance.query("CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000")
instance.query(
"CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000"
)
instance.query("INSERT INTO test.t_in_memory VALUES (1, 'a')")
instance.query("ALTER TABLE test.t_in_memory FREEZE")
fp_backup = get_last_backup_path(started_cluster.instances['node'], 'test', 't_in_memory')
part_path = fp_backup + '/all_1_1_0/'
fp_backup = get_last_backup_path(
started_cluster.instances["node"], "test", "t_in_memory"
)
part_path = fp_backup + "/all_1_1_0/"
assert TSV(instance.query("SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'")) == TSV("InMemory\t1\n")
instance.exec_in_container(['test', '-f', part_path + '/data.bin'])
assert instance.exec_in_container(['cat', part_path + '/count.txt']).strip() == '1'
assert TSV(
instance.query(
"SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'"
)
) == TSV("InMemory\t1\n")
instance.exec_in_container(["test", "-f", part_path + "/data.bin"])
assert instance.exec_in_container(["cat", part_path + "/count.txt"]).strip() == "1"
instance.query("CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a")
copy_backup_to_detached(started_cluster.instances['node'], 'test', 't_in_memory', 't_in_memory_2')
instance.query(
"CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a"
)
copy_backup_to_detached(
started_cluster.instances["node"], "test", "t_in_memory", "t_in_memory_2"
)
instance.query("ALTER TABLE test.t_in_memory_2 ATTACH PARTITION ID 'all'")
assert TSV(instance.query("SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'")) == TSV("Compact\n")
assert TSV(
instance.query(
"SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'"
)
) == TSV("Compact\n")
assert TSV(instance.query("SELECT a, s FROM test.t_in_memory_2")) == TSV("1\ta\n")

Some files were not shown because too many files have changed in this diff Show More