mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Merge pull request #35466 from ClickHouse/black
Check python black formatting
This commit is contained in:
commit
71fb04ea4a
@ -4,11 +4,12 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
|
||||
def parse_block(block=[], options=[]):
|
||||
|
||||
#print('block is here', block)
|
||||
#show_query = False
|
||||
#show_query = options.show_query
|
||||
# print('block is here', block)
|
||||
# show_query = False
|
||||
# show_query = options.show_query
|
||||
result = []
|
||||
query = block[0].strip()
|
||||
if len(block) > 4:
|
||||
@ -20,9 +21,9 @@ def parse_block(block=[], options=[]):
|
||||
timing2 = block[2].strip().split()[1]
|
||||
timing3 = block[3].strip().split()[1]
|
||||
if options.show_queries:
|
||||
result.append( query )
|
||||
result.append(query)
|
||||
if not options.show_first_timings:
|
||||
result += [ timing1 , timing2, timing3 ]
|
||||
result += [timing1, timing2, timing3]
|
||||
else:
|
||||
result.append(timing1)
|
||||
return result
|
||||
@ -37,12 +38,12 @@ def read_stats_file(options, fname):
|
||||
|
||||
for line in f.readlines():
|
||||
|
||||
if 'SELECT' in line:
|
||||
if "SELECT" in line:
|
||||
if len(block) > 1:
|
||||
result.append( parse_block(block, options) )
|
||||
block = [ line ]
|
||||
elif 'Time:' in line:
|
||||
block.append( line )
|
||||
result.append(parse_block(block, options))
|
||||
block = [line]
|
||||
elif "Time:" in line:
|
||||
block.append(line)
|
||||
|
||||
return result
|
||||
|
||||
@ -50,7 +51,7 @@ def read_stats_file(options, fname):
|
||||
def compare_stats_files(options, arguments):
|
||||
result = []
|
||||
file_output = []
|
||||
pyplot_colors = ['y', 'b', 'g', 'r']
|
||||
pyplot_colors = ["y", "b", "g", "r"]
|
||||
for fname in arguments[1:]:
|
||||
file_output.append((read_stats_file(options, fname)))
|
||||
if len(file_output[0]) > 0:
|
||||
@ -58,65 +59,92 @@ def compare_stats_files(options, arguments):
|
||||
for idx, data_set in enumerate(file_output):
|
||||
int_result = []
|
||||
for timing in data_set:
|
||||
int_result.append(float(timing[0])) #y values
|
||||
result.append([[x for x in range(0, len(int_result)) ], int_result,
|
||||
pyplot_colors[idx] + '^' ] )
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
int_result.append(float(timing[0])) # y values
|
||||
result.append(
|
||||
[
|
||||
[x for x in range(0, len(int_result))],
|
||||
int_result,
|
||||
pyplot_colors[idx] + "^",
|
||||
]
|
||||
)
|
||||
# result.append([x for x in range(1, len(int_result)) ]) #x values
|
||||
# result.append( pyplot_colors[idx] + '^' )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_args():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage='usage: %prog [options] [result_file_path]..')
|
||||
parser.add_option("-q", "--show-queries", help="Show statements along with timings", action="store_true", dest="show_queries")
|
||||
parser.add_option("-f", "--show-first-timings", help="Show only first tries timings", action="store_true", dest="show_first_timings")
|
||||
parser.add_option("-c", "--compare-mode", help="Prepare output for pyplot comparing result files.", action="store", dest="compare_mode")
|
||||
|
||||
parser = OptionParser(usage="usage: %prog [options] [result_file_path]..")
|
||||
parser.add_option(
|
||||
"-q",
|
||||
"--show-queries",
|
||||
help="Show statements along with timings",
|
||||
action="store_true",
|
||||
dest="show_queries",
|
||||
)
|
||||
parser.add_option(
|
||||
"-f",
|
||||
"--show-first-timings",
|
||||
help="Show only first tries timings",
|
||||
action="store_true",
|
||||
dest="show_first_timings",
|
||||
)
|
||||
parser.add_option(
|
||||
"-c",
|
||||
"--compare-mode",
|
||||
help="Prepare output for pyplot comparing result files.",
|
||||
action="store",
|
||||
dest="compare_mode",
|
||||
)
|
||||
(options, arguments) = parser.parse_args(sys.argv)
|
||||
if len(arguments) < 2:
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
return ( options, arguments )
|
||||
return (options, arguments)
|
||||
|
||||
|
||||
def gen_pyplot_code(options, arguments):
|
||||
result = ''
|
||||
result = ""
|
||||
data_sets = compare_stats_files(options, arguments)
|
||||
for idx, data_set in enumerate(data_sets, start=0):
|
||||
x_values, y_values, line_style = data_set
|
||||
result += '\nplt.plot('
|
||||
result += '%s, %s, \'%s\'' % ( x_values, y_values, line_style )
|
||||
result += ', label=\'%s try\')' % idx
|
||||
print('import matplotlib.pyplot as plt')
|
||||
result += "\nplt.plot("
|
||||
result += "%s, %s, '%s'" % (x_values, y_values, line_style)
|
||||
result += ", label='%s try')" % idx
|
||||
print("import matplotlib.pyplot as plt")
|
||||
print(result)
|
||||
print( 'plt.xlabel(\'Try number\')' )
|
||||
print( 'plt.ylabel(\'Timing\')' )
|
||||
print( 'plt.title(\'Benchmark query timings\')' )
|
||||
print('plt.legend()')
|
||||
print('plt.show()')
|
||||
print("plt.xlabel('Try number')")
|
||||
print("plt.ylabel('Timing')")
|
||||
print("plt.title('Benchmark query timings')")
|
||||
print("plt.legend()")
|
||||
print("plt.show()")
|
||||
|
||||
|
||||
def gen_html_json(options, arguments):
|
||||
tuples = read_stats_file(options, arguments[1])
|
||||
print('{')
|
||||
print("{")
|
||||
print('"system: GreenPlum(x2),')
|
||||
print(('"version": "%s",' % '4.3.9.1'))
|
||||
print(('"version": "%s",' % "4.3.9.1"))
|
||||
print('"data_size": 10000000,')
|
||||
print('"time": "",')
|
||||
print('"comments": "",')
|
||||
print('"result":')
|
||||
print('[')
|
||||
print("[")
|
||||
for s in tuples:
|
||||
print(s)
|
||||
print(']')
|
||||
print('}')
|
||||
print("]")
|
||||
print("}")
|
||||
|
||||
|
||||
def main():
|
||||
( options, arguments ) = parse_args()
|
||||
(options, arguments) = parse_args()
|
||||
if len(arguments) > 2:
|
||||
gen_pyplot_code(options, arguments)
|
||||
else:
|
||||
gen_html_json(options, arguments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -11,7 +11,7 @@ def removesuffix(text, suffix):
|
||||
https://www.python.org/dev/peps/pep-0616/
|
||||
"""
|
||||
if suffix and text.endswith(suffix):
|
||||
return text[:-len(suffix)]
|
||||
return text[: -len(suffix)]
|
||||
else:
|
||||
return text[:]
|
||||
|
||||
|
@ -3,55 +3,55 @@ import subprocess
|
||||
import datetime
|
||||
from flask import Flask, flash, request, redirect, url_for
|
||||
|
||||
|
||||
def run_command(command, wait=False):
|
||||
print("{} - execute shell command:{}".format(datetime.datetime.now(), command))
|
||||
lines = []
|
||||
p = subprocess.Popen(command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True)
|
||||
p = subprocess.Popen(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
|
||||
)
|
||||
if wait:
|
||||
for l in iter(p.stdout.readline, b''):
|
||||
for l in iter(p.stdout.readline, b""):
|
||||
lines.append(l)
|
||||
p.poll()
|
||||
return (lines, p.returncode)
|
||||
else:
|
||||
return(iter(p.stdout.readline, b''), 0)
|
||||
return (iter(p.stdout.readline, b""), 0)
|
||||
|
||||
|
||||
UPLOAD_FOLDER = './'
|
||||
ALLOWED_EXTENSIONS = {'txt', 'sh'}
|
||||
UPLOAD_FOLDER = "./"
|
||||
ALLOWED_EXTENSIONS = {"txt", "sh"}
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
|
||||
|
||||
@app.route('/')
|
||||
|
||||
@app.route("/")
|
||||
def hello_world():
|
||||
return 'Hello World'
|
||||
return "Hello World"
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and \
|
||||
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
@app.route('/upload', methods=['GET', 'POST'])
|
||||
@app.route("/upload", methods=["GET", "POST"])
|
||||
def upload_file():
|
||||
if request.method == 'POST':
|
||||
if request.method == "POST":
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
flash('No file part')
|
||||
if "file" not in request.files:
|
||||
flash("No file part")
|
||||
return redirect(request.url)
|
||||
file = request.files['file']
|
||||
file = request.files["file"]
|
||||
# If the user does not select a file, the browser submits an
|
||||
# empty file without a filename.
|
||||
if file.filename == '':
|
||||
flash('No selected file')
|
||||
if file.filename == "":
|
||||
flash("No selected file")
|
||||
return redirect(request.url)
|
||||
if file and allowed_file(file.filename):
|
||||
filename = file.filename
|
||||
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
||||
return redirect(url_for('upload_file', name=filename))
|
||||
return '''
|
||||
file.save(os.path.join(app.config["UPLOAD_FOLDER"], filename))
|
||||
return redirect(url_for("upload_file", name=filename))
|
||||
return """
|
||||
<!doctype html>
|
||||
<title>Upload new File</title>
|
||||
<h1>Upload new File</h1>
|
||||
@ -59,12 +59,15 @@ def upload_file():
|
||||
<input type=file name=file>
|
||||
<input type=submit value=Upload>
|
||||
</form>
|
||||
'''
|
||||
@app.route('/run', methods=['GET', 'POST'])
|
||||
"""
|
||||
|
||||
|
||||
@app.route("/run", methods=["GET", "POST"])
|
||||
def parse_request():
|
||||
data = request.data # data is empty
|
||||
run_command(data, wait=True)
|
||||
return 'Ok'
|
||||
return "Ok"
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=5011)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(port=5011)
|
||||
|
@ -19,58 +19,126 @@ import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s: %(levelname)s: %(module)s: %(message)s", level="WARNING"
|
||||
)
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
|
||||
|
||||
def reportStageEnd(stage):
|
||||
global stage_start_seconds, total_start_seconds
|
||||
|
||||
current = time.perf_counter()
|
||||
print(f'stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}')
|
||||
print(
|
||||
f"stage\t{stage}\t{current - stage_start_seconds:.3f}\t{current - total_start_seconds:.3f}"
|
||||
)
|
||||
stage_start_seconds = current
|
||||
|
||||
|
||||
def tsv_escape(s):
|
||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||
return (
|
||||
s.replace("\\", "\\\\")
|
||||
.replace("\t", "\\t")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\r", "")
|
||||
)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||
parser = argparse.ArgumentParser(description="Run performance test.")
|
||||
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
|
||||
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
|
||||
parser.add_argument('--host', nargs='*', default=['localhost'], help="Space-separated list of server hostname(s). Corresponds to '--port' options.")
|
||||
parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated list of server port(s). Corresponds to '--host' options.")
|
||||
parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.')
|
||||
parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.')
|
||||
parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.')
|
||||
parser.add_argument('--max-query-seconds', type=int, default=15, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--prewarm-max-query-seconds', type=int, default=180, help='For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.')
|
||||
parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.')
|
||||
parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
|
||||
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
|
||||
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
|
||||
parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.")
|
||||
parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.")
|
||||
parser.add_argument(
|
||||
"file",
|
||||
metavar="FILE",
|
||||
type=argparse.FileType("r", encoding="utf-8"),
|
||||
nargs=1,
|
||||
help="test description file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
nargs="*",
|
||||
default=["localhost"],
|
||||
help="Space-separated list of server hostname(s). Corresponds to '--port' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
nargs="*",
|
||||
default=[9000],
|
||||
help="Space-separated list of server port(s). Corresponds to '--host' options.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--runs", type=int, default=1, help="Number of query runs per server."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-queries",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Test no more than this number of queries, chosen at random.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-to-run",
|
||||
nargs="*",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Space-separated list of indexes of queries to test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-query-seconds",
|
||||
type=int,
|
||||
default=15,
|
||||
help="For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prewarm-max-query-seconds",
|
||||
type=int,
|
||||
default=180,
|
||||
help="For how many seconds at most a prewarm (cold storage) query is allowed to run. The script finishes with error if this time is exceeded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--profile-seconds",
|
||||
type=int,
|
||||
default=0,
|
||||
help="For how many seconds to profile a query for which the performance has changed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long", action="store_true", help="Do not skip the tests tagged as long."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-queries", action="store_true", help="Print test queries and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-settings", action="store_true", help="Print test settings and exit."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-created-tables",
|
||||
action="store_true",
|
||||
help="Don't drop the created tables after the test.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-existing-tables",
|
||||
action="store_true",
|
||||
help="Don't create or drop the tables, use the existing ones instead.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
reportStageEnd('start')
|
||||
reportStageEnd("start")
|
||||
|
||||
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
|
||||
|
||||
tree = et.parse(args.file[0])
|
||||
root = tree.getroot()
|
||||
|
||||
reportStageEnd('parse')
|
||||
reportStageEnd("parse")
|
||||
|
||||
# Process query parameters
|
||||
subst_elems = root.findall('substitutions/substitution')
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
subst_elems = root.findall("substitutions/substitution")
|
||||
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
|
||||
for e in subst_elems:
|
||||
name = e.find('name').text
|
||||
values = [v.text for v in e.findall('values/value')]
|
||||
name = e.find("name").text
|
||||
values = [v.text for v in e.findall("values/value")]
|
||||
if not values:
|
||||
raise Exception(f'No values given for substitution {{{name}}}')
|
||||
raise Exception(f"No values given for substitution {{{name}}}")
|
||||
|
||||
available_parameters[name] = values
|
||||
|
||||
@ -78,7 +146,7 @@ for e in subst_elems:
|
||||
# parameters. The set of parameters is determined based on the first list.
|
||||
# Note: keep the order of queries -- sometimes we have DROP IF EXISTS
|
||||
# followed by CREATE in create queries section, so the order matters.
|
||||
def substitute_parameters(query_templates, other_templates = []):
|
||||
def substitute_parameters(query_templates, other_templates=[]):
|
||||
query_results = []
|
||||
other_results = [[]] * (len(other_templates))
|
||||
for i, q in enumerate(query_templates):
|
||||
@ -103,17 +171,21 @@ def substitute_parameters(query_templates, other_templates = []):
|
||||
# and reporting the queries marked as short.
|
||||
test_queries = []
|
||||
is_short = []
|
||||
for e in root.findall('query'):
|
||||
new_queries, [new_is_short] = substitute_parameters([e.text], [[e.attrib.get('short', '0')]])
|
||||
for e in root.findall("query"):
|
||||
new_queries, [new_is_short] = substitute_parameters(
|
||||
[e.text], [[e.attrib.get("short", "0")]]
|
||||
)
|
||||
test_queries += new_queries
|
||||
is_short += [eval(s) for s in new_is_short]
|
||||
|
||||
assert(len(test_queries) == len(is_short))
|
||||
assert len(test_queries) == len(is_short)
|
||||
|
||||
# If we're given a list of queries to run, check that it makes sense.
|
||||
for i in args.queries_to_run or []:
|
||||
if i < 0 or i >= len(test_queries):
|
||||
print(f'There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present')
|
||||
print(
|
||||
f"There is no query no. {i} in this test, only [{0}-{len(test_queries) - 1}] are present"
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# If we're only asked to print the queries, do that and exit.
|
||||
@ -125,60 +197,65 @@ if args.print_queries:
|
||||
# Print short queries
|
||||
for i, s in enumerate(is_short):
|
||||
if s:
|
||||
print(f'short\t{i}')
|
||||
print(f"short\t{i}")
|
||||
|
||||
# If we're only asked to print the settings, do that and exit. These are settings
|
||||
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
|
||||
# '--max_memory_usage=10000000'.
|
||||
if args.print_settings:
|
||||
for s in root.findall('settings/*'):
|
||||
print(f'--{s.tag}={s.text}')
|
||||
for s in root.findall("settings/*"):
|
||||
print(f"--{s.tag}={s.text}")
|
||||
|
||||
exit(0)
|
||||
|
||||
# Skip long tests
|
||||
if not args.long:
|
||||
for tag in root.findall('.//tag'):
|
||||
if tag.text == 'long':
|
||||
print('skipped\tTest is tagged as long.')
|
||||
for tag in root.findall(".//tag"):
|
||||
if tag.text == "long":
|
||||
print("skipped\tTest is tagged as long.")
|
||||
sys.exit(0)
|
||||
|
||||
# Print report threshold for the test if it is set.
|
||||
ignored_relative_change = 0.05
|
||||
if 'max_ignored_relative_change' in root.attrib:
|
||||
if "max_ignored_relative_change" in root.attrib:
|
||||
ignored_relative_change = float(root.attrib["max_ignored_relative_change"])
|
||||
print(f'report-threshold\t{ignored_relative_change}')
|
||||
print(f"report-threshold\t{ignored_relative_change}")
|
||||
|
||||
reportStageEnd('before-connect')
|
||||
reportStageEnd("before-connect")
|
||||
|
||||
# Open connections
|
||||
servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)]
|
||||
servers = [
|
||||
{"host": host or args.host[0], "port": port or args.port[0]}
|
||||
for (host, port) in itertools.zip_longest(args.host, args.port)
|
||||
]
|
||||
# Force settings_is_important to fail queries on unknown settings.
|
||||
all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers]
|
||||
all_connections = [
|
||||
clickhouse_driver.Client(**server, settings_is_important=True) for server in servers
|
||||
]
|
||||
|
||||
for i, s in enumerate(servers):
|
||||
print(f'server\t{i}\t{s["host"]}\t{s["port"]}')
|
||||
|
||||
reportStageEnd('connect')
|
||||
reportStageEnd("connect")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run drop queries, ignoring errors. Do this before all other activity,
|
||||
# because clickhouse_driver disconnects on error (this is not configurable),
|
||||
# and the new connection loses the changes in settings.
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_query_templates = [q.text for q in root.findall("drop_query")]
|
||||
drop_queries = substitute_parameters(drop_query_templates)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
try:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
except:
|
||||
pass
|
||||
|
||||
reportStageEnd('drop-1')
|
||||
reportStageEnd("drop-1")
|
||||
|
||||
# Apply settings.
|
||||
settings = root.findall('settings/*')
|
||||
settings = root.findall("settings/*")
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
@ -189,48 +266,52 @@ for conn_index, c in enumerate(all_connections):
|
||||
# the test, which is wrong.
|
||||
c.execute("select 1")
|
||||
|
||||
reportStageEnd('settings')
|
||||
reportStageEnd("settings")
|
||||
|
||||
# Check tables that should exist. If they don't exist, just skip this test.
|
||||
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||
tables = [e.text for e in root.findall("preconditions/table_exists")]
|
||||
for t in tables:
|
||||
for c in all_connections:
|
||||
try:
|
||||
res = c.execute("select 1 from {} limit 1".format(t))
|
||||
except:
|
||||
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
|
||||
skipped_message = ' '.join(exception_message.split('\n')[:2])
|
||||
print(f'skipped\t{tsv_escape(skipped_message)}')
|
||||
skipped_message = " ".join(exception_message.split("\n")[:2])
|
||||
print(f"skipped\t{tsv_escape(skipped_message)}")
|
||||
sys.exit(0)
|
||||
|
||||
reportStageEnd('preconditions')
|
||||
reportStageEnd("preconditions")
|
||||
|
||||
if not args.use_existing_tables:
|
||||
# Run create and fill queries. We will run them simultaneously for both
|
||||
# servers, to save time. The weird XML search + filter is because we want to
|
||||
# keep the relative order of elements, and etree doesn't support the
|
||||
# appropriate xpath query.
|
||||
create_query_templates = [q.text for q in root.findall('./*')
|
||||
if q.tag in ('create_query', 'fill_query')]
|
||||
create_query_templates = [
|
||||
q.text for q in root.findall("./*") if q.tag in ("create_query", "fill_query")
|
||||
]
|
||||
create_queries = substitute_parameters(create_query_templates)
|
||||
|
||||
# Disallow temporary tables, because the clickhouse_driver reconnects on
|
||||
# errors, and temporary tables are destroyed. We want to be able to continue
|
||||
# after some errors.
|
||||
for q in create_queries:
|
||||
if re.search('create temporary table', q, flags=re.IGNORECASE):
|
||||
print(f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file = sys.stderr)
|
||||
if re.search("create temporary table", q, flags=re.IGNORECASE):
|
||||
print(
|
||||
f"Temporary tables are not allowed in performance tests: '{q}'",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
def do_create(connection, index, queries):
|
||||
for q in queries:
|
||||
connection.execute(q)
|
||||
print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
threads = [
|
||||
Thread(target = do_create, args = (connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)]
|
||||
Thread(target=do_create, args=(connection, index, create_queries))
|
||||
for index, connection in enumerate(all_connections)
|
||||
]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
@ -238,14 +319,16 @@ if not args.use_existing_tables:
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
reportStageEnd('create')
|
||||
reportStageEnd("create")
|
||||
|
||||
# By default, test all queries.
|
||||
queries_to_run = range(0, len(test_queries))
|
||||
|
||||
if args.max_queries:
|
||||
# If specified, test a limited number of queries chosen at random.
|
||||
queries_to_run = random.sample(range(0, len(test_queries)), min(len(test_queries), args.max_queries))
|
||||
queries_to_run = random.sample(
|
||||
range(0, len(test_queries)), min(len(test_queries), args.max_queries)
|
||||
)
|
||||
|
||||
if args.queries_to_run:
|
||||
# Run the specified queries.
|
||||
@ -255,16 +338,16 @@ if args.queries_to_run:
|
||||
profile_total_seconds = 0
|
||||
for query_index in queries_to_run:
|
||||
q = test_queries[query_index]
|
||||
query_prefix = f'{test_name}.query{query_index}'
|
||||
query_prefix = f"{test_name}.query{query_index}"
|
||||
|
||||
# We have some crazy long queries (about 100kB), so trim them to a sane
|
||||
# length. This means we can't use query text as an identifier and have to
|
||||
# use the test name + the test-wide query index.
|
||||
query_display_name = q
|
||||
if len(query_display_name) > 1000:
|
||||
query_display_name = f'{query_display_name[:1000]}...({query_index})'
|
||||
query_display_name = f"{query_display_name[:1000]}...({query_index})"
|
||||
|
||||
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
|
||||
print(f"display-name\t{query_index}\t{tsv_escape(query_display_name)}")
|
||||
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
@ -272,10 +355,10 @@ for query_index in queries_to_run:
|
||||
# new one. We want to run them on the new server only, so that the PR author
|
||||
# can ensure that the test works properly. Remember the errors we had on
|
||||
# each server.
|
||||
query_error_on_connection = [None] * len(all_connections);
|
||||
query_error_on_connection = [None] * len(all_connections)
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
try:
|
||||
prewarm_id = f'{query_prefix}.prewarm0'
|
||||
prewarm_id = f"{query_prefix}.prewarm0"
|
||||
|
||||
try:
|
||||
# During the warmup runs, we will also:
|
||||
@ -283,25 +366,30 @@ for query_index in queries_to_run:
|
||||
# * collect profiler traces, which might be helpful for analyzing
|
||||
# test coverage. We disable profiler for normal runs because
|
||||
# it makes the results unstable.
|
||||
res = c.execute(q, query_id = prewarm_id,
|
||||
settings = {
|
||||
'max_execution_time': args.prewarm_max_query_seconds,
|
||||
'query_profiler_real_time_period_ns': 10000000,
|
||||
'memory_profiler_step': '4Mi',
|
||||
})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=prewarm_id,
|
||||
settings={
|
||||
"max_execution_time": args.prewarm_max_query_seconds,
|
||||
"query_profiler_real_time_period_ns": 10000000,
|
||||
"memory_profiler_step": "4Mi",
|
||||
},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (prewarm_id, *e.args)
|
||||
e.message = prewarm_id + ': ' + e.message
|
||||
e.message = prewarm_id + ": " + e.message
|
||||
raise
|
||||
|
||||
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
print(
|
||||
f"prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except:
|
||||
# FIXME the driver reconnects on error and we lose settings, so this
|
||||
# might lead to further errors or unexpected behavior.
|
||||
query_error_on_connection[conn_index] = traceback.format_exc();
|
||||
query_error_on_connection[conn_index] = traceback.format_exc()
|
||||
continue
|
||||
|
||||
# Report all errors that ocurred during prewarm and decide what to do next.
|
||||
@ -311,14 +399,14 @@ for query_index in queries_to_run:
|
||||
no_errors = []
|
||||
for i, e in enumerate(query_error_on_connection):
|
||||
if e:
|
||||
print(e, file = sys.stderr)
|
||||
print(e, file=sys.stderr)
|
||||
else:
|
||||
no_errors.append(i)
|
||||
|
||||
if len(no_errors) == 0:
|
||||
continue
|
||||
elif len(no_errors) < len(all_connections):
|
||||
print(f'partial\t{query_index}\t{no_errors}')
|
||||
print(f"partial\t{query_index}\t{no_errors}")
|
||||
|
||||
this_query_connections = [all_connections[index] for index in no_errors]
|
||||
|
||||
@ -337,27 +425,34 @@ for query_index in queries_to_run:
|
||||
all_server_times.append([])
|
||||
|
||||
while True:
|
||||
run_id = f'{query_prefix}.run{run}'
|
||||
run_id = f"{query_prefix}.run{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'max_execution_time': args.max_query_seconds})
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"max_execution_time": args.max_query_seconds},
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
elapsed = c.last_query.elapsed
|
||||
all_server_times[conn_index].append(elapsed)
|
||||
|
||||
server_seconds += elapsed
|
||||
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}')
|
||||
print(f"query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}")
|
||||
|
||||
if elapsed > args.max_query_seconds:
|
||||
# Do not stop processing pathologically slow queries,
|
||||
# since this may hide errors in other queries.
|
||||
print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr)
|
||||
print(
|
||||
f"The query no. {query_index} is taking too long to run ({elapsed} s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Be careful with the counter, after this line it's the next iteration
|
||||
# already.
|
||||
@ -386,7 +481,7 @@ for query_index in queries_to_run:
|
||||
break
|
||||
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
|
||||
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
|
||||
|
||||
# Run additional profiling queries to collect profile data, but only if test times appeared to be different.
|
||||
# We have to do it after normal runs because otherwise it will affect test statistics too much
|
||||
@ -397,13 +492,15 @@ for query_index in queries_to_run:
|
||||
# Don't fail if for some reason there are not enough measurements.
|
||||
continue
|
||||
|
||||
pvalue = stats.ttest_ind(all_server_times[0], all_server_times[1], equal_var = False).pvalue
|
||||
pvalue = stats.ttest_ind(
|
||||
all_server_times[0], all_server_times[1], equal_var=False
|
||||
).pvalue
|
||||
median = [statistics.median(t) for t in all_server_times]
|
||||
# Keep this consistent with the value used in report. Should eventually move
|
||||
# to (median[1] - median[0]) / min(median), which is compatible with "times"
|
||||
# difference we use in report (max(median) / min(median)).
|
||||
relative_diff = (median[1] - median[0]) / median[0]
|
||||
print(f'diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}')
|
||||
print(f"diff\t{query_index}\t{median[0]}\t{median[1]}\t{relative_diff}\t{pvalue}")
|
||||
if abs(relative_diff) < ignored_relative_change or pvalue > 0.05:
|
||||
continue
|
||||
|
||||
@ -412,25 +509,31 @@ for query_index in queries_to_run:
|
||||
profile_start_seconds = time.perf_counter()
|
||||
run = 0
|
||||
while time.perf_counter() - profile_start_seconds < args.profile_seconds:
|
||||
run_id = f'{query_prefix}.profile{run}'
|
||||
run_id = f"{query_prefix}.profile{run}"
|
||||
|
||||
for conn_index, c in enumerate(this_query_connections):
|
||||
try:
|
||||
res = c.execute(q, query_id = run_id, settings = {'query_profiler_real_time_period_ns': 10000000})
|
||||
print(f'profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
|
||||
res = c.execute(
|
||||
q,
|
||||
query_id=run_id,
|
||||
settings={"query_profiler_real_time_period_ns": 10000000},
|
||||
)
|
||||
print(
|
||||
f"profile\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}"
|
||||
)
|
||||
except clickhouse_driver.errors.Error as e:
|
||||
# Add query id to the exception to make debugging easier.
|
||||
e.args = (run_id, *e.args)
|
||||
e.message = run_id + ': ' + e.message
|
||||
e.message = run_id + ": " + e.message
|
||||
raise
|
||||
|
||||
run += 1
|
||||
|
||||
profile_total_seconds += time.perf_counter() - profile_start_seconds
|
||||
|
||||
print(f'profile-total\t{profile_total_seconds}')
|
||||
print(f"profile-total\t{profile_total_seconds}")
|
||||
|
||||
reportStageEnd('run')
|
||||
reportStageEnd("run")
|
||||
|
||||
# Run drop queries
|
||||
if not args.keep_created_tables and not args.use_existing_tables:
|
||||
@ -438,6 +541,6 @@ if not args.keep_created_tables and not args.use_existing_tables:
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
print(f"drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}")
|
||||
|
||||
reportStageEnd('drop-2')
|
||||
reportStageEnd("drop-2")
|
||||
|
@ -12,9 +12,13 @@ import pprint
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
parser = argparse.ArgumentParser(description='Create performance test report')
|
||||
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
|
||||
help='Which report to build')
|
||||
parser = argparse.ArgumentParser(description="Create performance test report")
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default="main",
|
||||
choices=["main", "all-queries"],
|
||||
help="Which report to build",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tables = []
|
||||
@ -31,8 +35,8 @@ unstable_partial_queries = 0
|
||||
# max seconds to run one query by itself, not counting preparation
|
||||
allowed_single_run_time = 2
|
||||
|
||||
color_bad='#ffb0c0'
|
||||
color_good='#b0d050'
|
||||
color_bad = "#ffb0c0"
|
||||
color_good = "#b0d050"
|
||||
|
||||
header_template = """
|
||||
<!DOCTYPE html>
|
||||
@ -151,24 +155,29 @@ tr:nth-child(odd) td {{filter: brightness(90%);}}
|
||||
table_anchor = 0
|
||||
row_anchor = 0
|
||||
|
||||
|
||||
def currentTableAnchor():
|
||||
global table_anchor
|
||||
return f'{table_anchor}'
|
||||
return f"{table_anchor}"
|
||||
|
||||
|
||||
def newTableAnchor():
|
||||
global table_anchor
|
||||
table_anchor += 1
|
||||
return currentTableAnchor()
|
||||
|
||||
|
||||
def currentRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor}'
|
||||
return f"{table_anchor}.{row_anchor}"
|
||||
|
||||
|
||||
def nextRowAnchor():
|
||||
global row_anchor
|
||||
global table_anchor
|
||||
return f'{table_anchor}.{row_anchor + 1}'
|
||||
return f"{table_anchor}.{row_anchor + 1}"
|
||||
|
||||
|
||||
def advanceRowAnchor():
|
||||
global row_anchor
|
||||
@ -178,43 +187,58 @@ def advanceRowAnchor():
|
||||
|
||||
|
||||
def tr(x, anchor=None):
|
||||
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
|
||||
anchor = anchor if anchor else advanceRowAnchor()
|
||||
return f'<tr id={anchor}>{x}</tr>'
|
||||
return f"<tr id={anchor}>{x}</tr>"
|
||||
|
||||
def td(value, cell_attributes = ''):
|
||||
return '<td {cell_attributes}>{value}</td>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
|
||||
def th(value, cell_attributes = ''):
|
||||
return '<th {cell_attributes}>{value}</th>'.format(
|
||||
cell_attributes = cell_attributes,
|
||||
value = value)
|
||||
def td(value, cell_attributes=""):
|
||||
return "<td {cell_attributes}>{value}</td>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
def tableRow(cell_values, cell_attributes = [], anchor=None):
|
||||
|
||||
def th(value, cell_attributes=""):
|
||||
return "<th {cell_attributes}>{value}</th>".format(
|
||||
cell_attributes=cell_attributes, value=value
|
||||
)
|
||||
|
||||
|
||||
def tableRow(cell_values, cell_attributes=[], anchor=None):
|
||||
return tr(
|
||||
''.join([td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]),
|
||||
anchor)
|
||||
"".join(
|
||||
[
|
||||
td(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
),
|
||||
anchor,
|
||||
)
|
||||
|
||||
def tableHeader(cell_values, cell_attributes = []):
|
||||
|
||||
def tableHeader(cell_values, cell_attributes=[]):
|
||||
return tr(
|
||||
''.join([th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes,
|
||||
fillvalue = '')
|
||||
if a is not None and v is not None]))
|
||||
"".join(
|
||||
[
|
||||
th(v, a)
|
||||
for v, a in itertools.zip_longest(
|
||||
cell_values, cell_attributes, fillvalue=""
|
||||
)
|
||||
if a is not None and v is not None
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def tableStart(title):
|
||||
cls = '-'.join(title.lower().split(' ')[:3]);
|
||||
cls = "-".join(title.lower().split(" ")[:3])
|
||||
global table_anchor
|
||||
table_anchor = cls
|
||||
anchor = currentTableAnchor()
|
||||
help_anchor = '-'.join(title.lower().split(' '));
|
||||
help_anchor = "-".join(title.lower().split(" "))
|
||||
return f"""
|
||||
<h2 id="{anchor}">
|
||||
<a class="cancela" href="#{anchor}">{title}</a>
|
||||
@ -223,12 +247,14 @@ def tableStart(title):
|
||||
<table class="{cls}">
|
||||
"""
|
||||
|
||||
|
||||
def tableEnd():
|
||||
return '</table>'
|
||||
return "</table>"
|
||||
|
||||
|
||||
def tsvRows(n):
|
||||
try:
|
||||
with open(n, encoding='utf-8') as fd:
|
||||
with open(n, encoding="utf-8") as fd:
|
||||
result = []
|
||||
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
|
||||
new_row = []
|
||||
@ -237,27 +263,32 @@ def tsvRows(n):
|
||||
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
|
||||
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
|
||||
|
||||
new_row.append(e.encode('utf-8').decode('unicode-escape').encode('latin1').decode('utf-8'))
|
||||
new_row.append(
|
||||
e.encode("utf-8")
|
||||
.decode("unicode-escape")
|
||||
.encode("latin1")
|
||||
.decode("utf-8")
|
||||
)
|
||||
result.append(new_row)
|
||||
return result
|
||||
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
return []
|
||||
|
||||
|
||||
def htmlRows(n):
|
||||
rawRows = tsvRows(n)
|
||||
result = ''
|
||||
result = ""
|
||||
for row in rawRows:
|
||||
result += tableRow(row)
|
||||
return result
|
||||
|
||||
|
||||
def addSimpleTable(caption, columns, rows, pos=None):
|
||||
global tables
|
||||
text = ''
|
||||
text = ""
|
||||
if not rows:
|
||||
return
|
||||
|
||||
@ -268,51 +299,63 @@ def addSimpleTable(caption, columns, rows, pos=None):
|
||||
text += tableEnd()
|
||||
tables.insert(pos if pos else len(tables), text)
|
||||
|
||||
|
||||
def add_tested_commits():
|
||||
global report_errors
|
||||
try:
|
||||
addSimpleTable('Tested Commits', ['Old', 'New'],
|
||||
[['<pre>{}</pre>'.format(x) for x in
|
||||
[open('left-commit.txt').read(),
|
||||
open('right-commit.txt').read()]]])
|
||||
addSimpleTable(
|
||||
"Tested Commits",
|
||||
["Old", "New"],
|
||||
[
|
||||
[
|
||||
"<pre>{}</pre>".format(x)
|
||||
for x in [
|
||||
open("left-commit.txt").read(),
|
||||
open("right-commit.txt").read(),
|
||||
]
|
||||
]
|
||||
],
|
||||
)
|
||||
except:
|
||||
# Don't fail if no commit info -- maybe it's a manual run.
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
|
||||
def add_report_errors():
|
||||
global tables
|
||||
global report_errors
|
||||
# Add the errors reported by various steps of comparison script
|
||||
try:
|
||||
report_errors += [l.strip() for l in open('report/errors.log')]
|
||||
report_errors += [l.strip() for l in open("report/errors.log")]
|
||||
except:
|
||||
report_errors.append(
|
||||
traceback.format_exception_only(
|
||||
*sys.exc_info()[:2])[-1])
|
||||
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
|
||||
pass
|
||||
|
||||
if not report_errors:
|
||||
return
|
||||
|
||||
text = tableStart('Errors while Building the Report')
|
||||
text += tableHeader(['Error'])
|
||||
text = tableStart("Errors while Building the Report")
|
||||
text += tableHeader(["Error"])
|
||||
for x in report_errors:
|
||||
text += tableRow([x])
|
||||
text += tableEnd()
|
||||
# Insert after Tested Commits
|
||||
tables.insert(1, text)
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def add_errors_explained():
|
||||
if not errors_explained:
|
||||
return
|
||||
|
||||
text = '<a name="fail1"/>'
|
||||
text += tableStart('Error Summary')
|
||||
text += tableHeader(['Description'])
|
||||
text += tableStart("Error Summary")
|
||||
text += tableHeader(["Description"])
|
||||
for row in errors_explained:
|
||||
text += tableRow(row)
|
||||
text += tableEnd()
|
||||
@ -321,59 +364,81 @@ def add_errors_explained():
|
||||
tables.insert(1, text)
|
||||
|
||||
|
||||
if args.report == 'main':
|
||||
if args.report == "main":
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
|
||||
run_error_rows = tsvRows('run-errors.tsv')
|
||||
run_error_rows = tsvRows("run-errors.tsv")
|
||||
error_tests += len(run_error_rows)
|
||||
addSimpleTable('Run Errors', ['Test', 'Error'], run_error_rows)
|
||||
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
|
||||
if run_error_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
|
||||
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
|
||||
error_tests += len(slow_on_client_rows)
|
||||
addSimpleTable('Slow on Client',
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'],
|
||||
slow_on_client_rows)
|
||||
addSimpleTable(
|
||||
"Slow on Client",
|
||||
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
|
||||
slow_on_client_rows,
|
||||
)
|
||||
if slow_on_client_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries are taking noticeable time client-side (missing `FORMAT Null`?)</a>'
|
||||
]
|
||||
)
|
||||
|
||||
unmarked_short_rows = tsvRows('report/unexpected-query-duration.tsv')
|
||||
unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
|
||||
error_tests += len(unmarked_short_rows)
|
||||
addSimpleTable('Unexpected Query Duration',
|
||||
['Problem', 'Marked as "short"?', 'Run time, s', 'Test', '#', 'Query'],
|
||||
unmarked_short_rows)
|
||||
addSimpleTable(
|
||||
"Unexpected Query Duration",
|
||||
["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
|
||||
unmarked_short_rows,
|
||||
)
|
||||
if unmarked_short_rows:
|
||||
errors_explained.append([f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>']);
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
|
||||
]
|
||||
)
|
||||
|
||||
def add_partial():
|
||||
rows = tsvRows('report/partial-queries-report.tsv')
|
||||
rows = tsvRows("report/partial-queries-report.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global unstable_partial_queries, slow_average_tests, tables
|
||||
text = tableStart('Partial Queries')
|
||||
columns = ['Median time, s', 'Relative time variance', 'Test', '#', 'Query']
|
||||
text = tableStart("Partial Queries")
|
||||
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
|
||||
text += tableHeader(columns)
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[2]}.{row[3]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
|
||||
if float(row[1]) > 0.10:
|
||||
attrs[1] = f'style="background: {color_bad}"'
|
||||
unstable_partial_queries += 1
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' has excessive variance of run time. Keep it below 10%</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[1] = ''
|
||||
attrs[1] = ""
|
||||
if float(row[0]) > allowed_single_run_time:
|
||||
attrs[0] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
|
||||
]
|
||||
)
|
||||
slow_average_tests += 1
|
||||
else:
|
||||
attrs[0] = ''
|
||||
attrs[0] = ""
|
||||
text += tableRow(row, attrs, anchor)
|
||||
text += tableEnd()
|
||||
tables.append(text)
|
||||
@ -381,41 +446,45 @@ if args.report == 'main':
|
||||
add_partial()
|
||||
|
||||
def add_changes():
|
||||
rows = tsvRows('report/changed-perf.tsv')
|
||||
rows = tsvRows("report/changed-perf.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
global faster_queries, slower_queries, tables
|
||||
|
||||
text = tableStart('Changes in Performance')
|
||||
text = tableStart("Changes in Performance")
|
||||
columns = [
|
||||
'Old, s', # 0
|
||||
'New, s', # 1
|
||||
'Ratio of speedup (-) or slowdown (+)', # 2
|
||||
'Relative difference (new − old) / old', # 3
|
||||
'p < 0.01 threshold', # 4
|
||||
'', # Failed # 5
|
||||
'Test', # 6
|
||||
'#', # 7
|
||||
'Query', # 8
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Ratio of speedup (-) or slowdown (+)", # 2
|
||||
"Relative difference (new − old) / old", # 3
|
||||
"p < 0.01 threshold", # 4
|
||||
"", # Failed # 5
|
||||
"Test", # 6
|
||||
"#", # 7
|
||||
"Query", # 8
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[5] = None
|
||||
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for row in rows:
|
||||
anchor = f'{currentTableAnchor()}.{row[6]}.{row[7]}'
|
||||
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
|
||||
if int(row[5]):
|
||||
if float(row[3]) < 0.:
|
||||
if float(row[3]) < 0.0:
|
||||
faster_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_good}"'
|
||||
else:
|
||||
slower_queries += 1
|
||||
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The query no. {row[7]} of test \'{row[6]}\' has slowed down</a>'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[2] = attrs[3] = ''
|
||||
attrs[2] = attrs[3] = ""
|
||||
|
||||
text += tableRow(row, attrs, anchor)
|
||||
|
||||
@ -427,35 +496,35 @@ if args.report == 'main':
|
||||
def add_unstable_queries():
|
||||
global unstable_queries, very_unstable_queries, tables
|
||||
|
||||
unstable_rows = tsvRows('report/unstable-queries.tsv')
|
||||
unstable_rows = tsvRows("report/unstable-queries.tsv")
|
||||
if not unstable_rows:
|
||||
return
|
||||
|
||||
unstable_queries += len(unstable_rows)
|
||||
|
||||
columns = [
|
||||
'Old, s', #0
|
||||
'New, s', #1
|
||||
'Relative difference (new - old)/old', #2
|
||||
'p < 0.01 threshold', #3
|
||||
'', # Failed #4
|
||||
'Test', #5
|
||||
'#', #6
|
||||
'Query' #7
|
||||
"Old, s", # 0
|
||||
"New, s", # 1
|
||||
"Relative difference (new - old)/old", # 2
|
||||
"p < 0.01 threshold", # 3
|
||||
"", # Failed #4
|
||||
"Test", # 5
|
||||
"#", # 6
|
||||
"Query", # 7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[4] = None
|
||||
|
||||
text = tableStart('Unstable Queries')
|
||||
text = tableStart("Unstable Queries")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in unstable_rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[5]}.{r[6]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
|
||||
if int(r[4]):
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
attrs[3] = ""
|
||||
# Just don't add the slightly unstable queries we don't consider
|
||||
# errors. It's not clear what the user should do with them.
|
||||
continue
|
||||
@ -470,53 +539,70 @@ if args.report == 'main':
|
||||
|
||||
add_unstable_queries()
|
||||
|
||||
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
|
||||
addSimpleTable('Skipped Tests', ['Test', 'Reason'], skipped_tests_rows)
|
||||
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
|
||||
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
|
||||
|
||||
addSimpleTable('Test Performance Changes',
|
||||
['Test', 'Ratio of speedup (-) or slowdown (+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
|
||||
tsvRows('report/test-perf-changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Test Performance Changes",
|
||||
[
|
||||
"Test",
|
||||
"Ratio of speedup (-) or slowdown (+)",
|
||||
"Queries",
|
||||
"Total not OK",
|
||||
"Changed perf",
|
||||
"Unstable",
|
||||
],
|
||||
tsvRows("report/test-perf-changes.tsv"),
|
||||
)
|
||||
|
||||
def add_test_times():
|
||||
global slow_average_tests, tables
|
||||
rows = tsvRows('report/test-times.tsv')
|
||||
rows = tsvRows("report/test-times.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"Test", # 0
|
||||
"Wall clock time, entire test, s", # 1
|
||||
"Total client time for measured query runs, s", # 2
|
||||
"Queries", # 3
|
||||
"Longest query, total for measured runs, s", # 4
|
||||
"Wall clock time per query, s", # 5
|
||||
"Shortest query, total for measured runs, s", # 6
|
||||
"", # Runs #7
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[7] = None
|
||||
|
||||
text = tableStart('Test Times')
|
||||
text = tableStart("Test Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[0]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[0]}"
|
||||
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
|
||||
if r[0] != 'Total' and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
|
||||
# FIXME should be 15s max -- investigate parallel_insert
|
||||
slow_average_tests += 1
|
||||
attrs[5] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="#{anchor}">The test \'{r[0]}\' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[5] = ''
|
||||
attrs[5] = ""
|
||||
|
||||
if r[0] != 'Total' and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
|
||||
slow_average_tests += 1
|
||||
attrs[4] = f'style="background: {color_bad}"'
|
||||
errors_explained.append([f'<a href="./all-queries.html#all-query-times.{r[0]}.0">Some query of the test \'{r[0]}\' is too slow to run. See the all queries report'])
|
||||
errors_explained.append(
|
||||
[
|
||||
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
|
||||
]
|
||||
)
|
||||
else:
|
||||
attrs[4] = ''
|
||||
attrs[4] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -525,10 +611,17 @@ if args.report == 'main':
|
||||
|
||||
add_test_times()
|
||||
|
||||
addSimpleTable('Metric Changes',
|
||||
['Metric', 'Old median value', 'New median value',
|
||||
'Relative difference', 'Times difference'],
|
||||
tsvRows('metrics/changes.tsv'))
|
||||
addSimpleTable(
|
||||
"Metric Changes",
|
||||
[
|
||||
"Metric",
|
||||
"Old median value",
|
||||
"New median value",
|
||||
"Relative difference",
|
||||
"Times difference",
|
||||
],
|
||||
tsvRows("metrics/changes.tsv"),
|
||||
)
|
||||
|
||||
add_report_errors()
|
||||
add_errors_explained()
|
||||
@ -536,7 +629,8 @@ if args.report == 'main':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="all-queries.html">All queries</a>
|
||||
@ -546,104 +640,111 @@ if args.report == 'main':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
status = "success"
|
||||
message = "See the report"
|
||||
message_array = []
|
||||
|
||||
if slow_average_tests:
|
||||
status = 'failure'
|
||||
message_array.append(str(slow_average_tests) + ' too long')
|
||||
status = "failure"
|
||||
message_array.append(str(slow_average_tests) + " too long")
|
||||
|
||||
if faster_queries:
|
||||
message_array.append(str(faster_queries) + ' faster')
|
||||
message_array.append(str(faster_queries) + " faster")
|
||||
|
||||
if slower_queries:
|
||||
if slower_queries > 3:
|
||||
status = 'failure'
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
status = "failure"
|
||||
message_array.append(str(slower_queries) + " slower")
|
||||
|
||||
if unstable_partial_queries:
|
||||
very_unstable_queries += unstable_partial_queries
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
|
||||
# Don't show mildly unstable queries, only the very unstable ones we
|
||||
# treat as errors.
|
||||
if very_unstable_queries:
|
||||
if very_unstable_queries > 5:
|
||||
error_tests += very_unstable_queries
|
||||
status = 'failure'
|
||||
message_array.append(str(very_unstable_queries) + ' unstable')
|
||||
status = "failure"
|
||||
message_array.append(str(very_unstable_queries) + " unstable")
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = 'failure'
|
||||
message_array.insert(0, str(error_tests) + ' errors')
|
||||
status = "failure"
|
||||
message_array.insert(0, str(error_tests) + " errors")
|
||||
|
||||
if message_array:
|
||||
message = ', '.join(message_array)
|
||||
message = ", ".join(message_array)
|
||||
|
||||
if report_errors:
|
||||
status = 'failure'
|
||||
message = 'Errors while building the report.'
|
||||
status = "failure"
|
||||
message = "Errors while building the report."
|
||||
|
||||
print(("""
|
||||
print(
|
||||
(
|
||||
"""
|
||||
<!--status: {status}-->
|
||||
<!--message: {message}-->
|
||||
""".format(status=status, message=message)))
|
||||
""".format(
|
||||
status=status, message=message
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
elif args.report == 'all-queries':
|
||||
elif args.report == "all-queries":
|
||||
|
||||
print((header_template.format()))
|
||||
|
||||
add_tested_commits()
|
||||
|
||||
def add_all_queries():
|
||||
rows = tsvRows('report/all-queries.tsv')
|
||||
rows = tsvRows("report/all-queries.tsv")
|
||||
if not rows:
|
||||
return
|
||||
|
||||
columns = [
|
||||
'', # Changed #0
|
||||
'', # Unstable #1
|
||||
'Old, s', #2
|
||||
'New, s', #3
|
||||
'Ratio of speedup (-) or slowdown (+)', #4
|
||||
'Relative difference (new − old) / old', #5
|
||||
'p < 0.01 threshold', #6
|
||||
'Test', #7
|
||||
'#', #8
|
||||
'Query', #9
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
"", # Changed #0
|
||||
"", # Unstable #1
|
||||
"Old, s", # 2
|
||||
"New, s", # 3
|
||||
"Ratio of speedup (-) or slowdown (+)", # 4
|
||||
"Relative difference (new − old) / old", # 5
|
||||
"p < 0.01 threshold", # 6
|
||||
"Test", # 7
|
||||
"#", # 8
|
||||
"Query", # 9
|
||||
]
|
||||
attrs = ["" for c in columns]
|
||||
attrs[0] = None
|
||||
attrs[1] = None
|
||||
|
||||
text = tableStart('All Query Times')
|
||||
text = tableStart("All Query Times")
|
||||
text += tableHeader(columns, attrs)
|
||||
|
||||
for r in rows:
|
||||
anchor = f'{currentTableAnchor()}.{r[7]}.{r[8]}'
|
||||
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
|
||||
if int(r[1]):
|
||||
attrs[6] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[6] = ''
|
||||
attrs[6] = ""
|
||||
|
||||
if int(r[0]):
|
||||
if float(r[5]) > 0.:
|
||||
if float(r[5]) > 0.0:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = f'style="background: {color_good}"'
|
||||
else:
|
||||
attrs[4] = attrs[5] = ''
|
||||
attrs[4] = attrs[5] = ""
|
||||
|
||||
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
|
||||
attrs[2] = f'style="background: {color_bad}"'
|
||||
attrs[3] = f'style="background: {color_bad}"'
|
||||
else:
|
||||
attrs[2] = ''
|
||||
attrs[3] = ''
|
||||
attrs[2] = ""
|
||||
attrs[3] = ""
|
||||
|
||||
text += tableRow(r, attrs, anchor)
|
||||
|
||||
@ -655,7 +756,8 @@ elif args.report == 'all-queries':
|
||||
for t in tables:
|
||||
print(t)
|
||||
|
||||
print(f"""
|
||||
print(
|
||||
f"""
|
||||
</div>
|
||||
<p class="links">
|
||||
<a href="report.html">Main report</a>
|
||||
@ -665,4 +767,5 @@ elif args.report == 'all-queries':
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
@ -7,18 +7,19 @@ import csv
|
||||
|
||||
RESULT_LOG_NAME = "run.log"
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
|
||||
status = "success"
|
||||
description = 'Server started and responded'
|
||||
description = "Server started and responded"
|
||||
summary = [("Smoke test", "OK")]
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
|
||||
lines = run_log.read().split('\n')
|
||||
if not lines or lines[0].strip() != 'OK':
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
|
||||
lines = run_log.read().split("\n")
|
||||
if not lines or lines[0].strip() != "OK":
|
||||
status = "failure"
|
||||
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
|
||||
logging.info("Lines is not ok: %s", str("\n".join(lines)))
|
||||
summary = [("Smoke test", "FAIL")]
|
||||
description = 'Server failed to respond, see result in logs'
|
||||
description = "Server failed to respond, see result in logs"
|
||||
|
||||
result_logs = []
|
||||
server_log_path = os.path.join(result_folder, "clickhouse-server.log")
|
||||
@ -38,20 +39,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of split build smoke test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of split build smoke test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -10,11 +10,18 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
summary = []
|
||||
paths = []
|
||||
tests = ["TLPWhere", "TLPGroupBy", "TLPHaving", "TLPWhereGroupBy", "TLPDistinct", "TLPAggregate"]
|
||||
tests = [
|
||||
"TLPWhere",
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhereGroupBy",
|
||||
"TLPDistinct",
|
||||
"TLPAggregate",
|
||||
]
|
||||
|
||||
for test in tests:
|
||||
err_path = '{}/{}.err'.format(result_folder, test)
|
||||
out_path = '{}/{}.out'.format(result_folder, test)
|
||||
err_path = "{}/{}.err".format(result_folder, test)
|
||||
out_path = "{}/{}.out".format(result_folder, test)
|
||||
if not os.path.exists(err_path):
|
||||
logging.info("No output err on path %s", err_path)
|
||||
summary.append((test, "SKIPPED"))
|
||||
@ -23,24 +30,24 @@ def process_result(result_folder):
|
||||
else:
|
||||
paths.append(err_path)
|
||||
paths.append(out_path)
|
||||
with open(err_path, 'r') as f:
|
||||
if 'AssertionError' in f.read():
|
||||
with open(err_path, "r") as f:
|
||||
if "AssertionError" in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
status = 'failure'
|
||||
status = "failure"
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
logs_path = '{}/logs.tar.gz'.format(result_folder)
|
||||
logs_path = "{}/logs.tar.gz".format(result_folder)
|
||||
if not os.path.exists(logs_path):
|
||||
logging.info("No logs tar on path %s", logs_path)
|
||||
else:
|
||||
paths.append(logs_path)
|
||||
stdout_path = '{}/stdout.log'.format(result_folder)
|
||||
stdout_path = "{}/stdout.log".format(result_folder)
|
||||
if not os.path.exists(stdout_path):
|
||||
logging.info("No stdout log on path %s", stdout_path)
|
||||
else:
|
||||
paths.append(stdout_path)
|
||||
stderr_path = '{}/stderr.log'.format(result_folder)
|
||||
stderr_path = "{}/stderr.log".format(result_folder)
|
||||
if not os.path.exists(stderr_path):
|
||||
logging.info("No stderr log on path %s", stderr_path)
|
||||
else:
|
||||
@ -52,20 +59,22 @@ def process_result(result_folder):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of sqlancer test")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of sqlancer test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
|
@ -16,7 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
python3-pip \
|
||||
shellcheck \
|
||||
yamllint \
|
||||
&& pip3 install codespell PyGithub boto3 unidiff dohq-artifactory
|
||||
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
@ -14,6 +14,7 @@ def process_result(result_folder):
|
||||
("header duplicates", "duplicate_output.txt"),
|
||||
("shellcheck", "shellcheck_output.txt"),
|
||||
("style", "style_output.txt"),
|
||||
("black", "black_output.txt"),
|
||||
("typos", "typos_output.txt"),
|
||||
("whitespaces", "whitespaces_output.txt"),
|
||||
("workflows", "workflows_output.txt"),
|
||||
|
@ -7,6 +7,8 @@ echo "Check duplicates" | ts
|
||||
./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt
|
||||
echo "Check style" | ts
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
echo "Check python formatting with black" | ts
|
||||
./check-black -n |& tee /test_output/black_output.txt
|
||||
echo "Check typos" | ts
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
echo "Check whitespaces" | ts
|
||||
|
@ -22,9 +22,9 @@ def process_result(result_folder):
|
||||
total_other = 0
|
||||
test_results = []
|
||||
for test in results["tests"]:
|
||||
test_name = test['test']['test_name']
|
||||
test_result = test['result']['result_type'].upper()
|
||||
test_time = str(test['result']['message_rtime'])
|
||||
test_name = test["test"]["test_name"]
|
||||
test_result = test["result"]["result_type"].upper()
|
||||
test_time = str(test["result"]["message_rtime"])
|
||||
total_tests += 1
|
||||
if test_result == "OK":
|
||||
total_ok += 1
|
||||
@ -39,24 +39,29 @@ def process_result(result_folder):
|
||||
else:
|
||||
status = "success"
|
||||
|
||||
description = "failed: {}, passed: {}, other: {}".format(total_fail, total_ok, total_other)
|
||||
description = "failed: {}, passed: {}, other: {}".format(
|
||||
total_fail, total_ok, total_other
|
||||
)
|
||||
return status, description, test_results, [json_path, test_binary_log]
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of Testflows tests")
|
||||
parser.add_argument("--in-results-dir", default='./')
|
||||
parser.add_argument("--out-results-file", default='./test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='./check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of Testflows tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="./")
|
||||
parser.add_argument("--out-results-file", default="./test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="./check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
@ -64,4 +69,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -5,24 +5,26 @@ import logging
|
||||
import argparse
|
||||
import csv
|
||||
|
||||
OK_SIGN = 'OK ]'
|
||||
FAILED_SIGN = 'FAILED ]'
|
||||
SEGFAULT = 'Segmentation fault'
|
||||
SIGNAL = 'received signal SIG'
|
||||
PASSED = 'PASSED'
|
||||
OK_SIGN = "OK ]"
|
||||
FAILED_SIGN = "FAILED ]"
|
||||
SEGFAULT = "Segmentation fault"
|
||||
SIGNAL = "received signal SIG"
|
||||
PASSED = "PASSED"
|
||||
|
||||
|
||||
def get_test_name(line):
|
||||
elements = reversed(line.split(' '))
|
||||
elements = reversed(line.split(" "))
|
||||
for element in elements:
|
||||
if '(' not in element and ')' not in element:
|
||||
if "(" not in element and ")" not in element:
|
||||
return element
|
||||
raise Exception("No test name in line '{}'".format(line))
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
summary = []
|
||||
total_counter = 0
|
||||
failed_counter = 0
|
||||
result_log_path = '{}/test_result.txt'.format(result_folder)
|
||||
result_log_path = "{}/test_result.txt".format(result_folder)
|
||||
if not os.path.exists(result_log_path):
|
||||
logging.info("No output log on path %s", result_log_path)
|
||||
return "exception", "No output log", []
|
||||
@ -30,7 +32,7 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
description = ""
|
||||
passed = False
|
||||
with open(result_log_path, 'r') as test_result:
|
||||
with open(result_log_path, "r") as test_result:
|
||||
for line in test_result:
|
||||
if OK_SIGN in line:
|
||||
logging.info("Found ok line: '%s'", line)
|
||||
@ -38,7 +40,7 @@ def process_result(result_folder):
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
summary.append((test_name, "OK"))
|
||||
total_counter += 1
|
||||
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
|
||||
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
|
||||
logging.info("Found fail line: '%s'", line)
|
||||
test_name = get_test_name(line.strip())
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
@ -67,25 +69,30 @@ def process_result(result_folder):
|
||||
status = "failure"
|
||||
|
||||
if not description:
|
||||
description += "fail: {}, passed: {}".format(failed_counter, total_counter - failed_counter)
|
||||
description += "fail: {}, passed: {}".format(
|
||||
failed_counter, total_counter - failed_counter
|
||||
)
|
||||
|
||||
return status, description, summary
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of unit tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of unit tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
@ -93,4 +100,3 @@ if __name__ == "__main__":
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
logging.info("Result written")
|
||||
|
||||
|
@ -16,6 +16,7 @@ NO_TASK_TIMEOUT_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
def process_test_log(log_path):
|
||||
total = 0
|
||||
skipped = 0
|
||||
@ -26,7 +27,7 @@ def process_test_log(log_path):
|
||||
retries = False
|
||||
task_timeout = True
|
||||
test_results = []
|
||||
with open(log_path, 'r') as test_file:
|
||||
with open(log_path, "r") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
@ -36,12 +37,15 @@ def process_test_log(log_path):
|
||||
hung = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
|
||||
test_name = line.split(' ')[2].split(':')[0]
|
||||
if any(
|
||||
sign in line
|
||||
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
|
||||
):
|
||||
test_name = line.split(" ")[2].split(":")[0]
|
||||
|
||||
test_time = ''
|
||||
test_time = ""
|
||||
try:
|
||||
time_token = line.split(']')[1].strip().split()[0]
|
||||
time_token = line.split("]")[1].strip().split()[0]
|
||||
float(time_token)
|
||||
test_time = time_token
|
||||
except:
|
||||
@ -66,9 +70,22 @@ def process_test_log(log_path):
|
||||
elif len(test_results) > 0 and test_results[-1][1] == "FAIL":
|
||||
test_results[-1][3].append(original_line)
|
||||
|
||||
test_results = [(test[0], test[1], test[2], ''.join(test[3])) for test in test_results]
|
||||
test_results = [
|
||||
(test[0], test[1], test[2], "".join(test[3])) for test in test_results
|
||||
]
|
||||
|
||||
return (
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
)
|
||||
|
||||
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
|
||||
|
||||
def process_result(result_path):
|
||||
test_results = []
|
||||
@ -76,16 +93,26 @@ def process_result(result_path):
|
||||
description = ""
|
||||
files = os.listdir(result_path)
|
||||
if files:
|
||||
logging.info("Find files in result folder %s", ','.join(files))
|
||||
result_path = os.path.join(result_path, 'test_result.txt')
|
||||
logging.info("Find files in result folder %s", ",".join(files))
|
||||
result_path = os.path.join(result_path, "test_result.txt")
|
||||
else:
|
||||
result_path = None
|
||||
description = "No output log"
|
||||
state = "error"
|
||||
|
||||
if result_path and os.path.exists(result_path):
|
||||
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
|
||||
(
|
||||
total,
|
||||
skipped,
|
||||
unknown,
|
||||
failed,
|
||||
success,
|
||||
hung,
|
||||
task_timeout,
|
||||
retries,
|
||||
test_results,
|
||||
) = process_test_log(result_path)
|
||||
is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
logging.info("Is flacky check: %s", is_flacky_check)
|
||||
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
|
||||
@ -120,20 +147,22 @@ def process_result(result_path):
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
with open(results_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, 'w') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
parser = argparse.ArgumentParser(description="ClickHouse script for parsing results of functional tests")
|
||||
parser.add_argument("--in-results-dir", default='/test_output/')
|
||||
parser.add_argument("--out-results-file", default='/test_output/test_results.tsv')
|
||||
parser.add_argument("--out-status-file", default='/test_output/check_status.tsv')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of functional tests"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results = process_result(args.in_results_dir)
|
||||
|
@ -71,6 +71,8 @@ This check means that the CI system started to process the pull request. When it
|
||||
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
|
||||
If it fails, fix the style errors following the [code style guide](style.md).
|
||||
|
||||
Python code is checked with [black](https://github.com/psf/black/).
|
||||
|
||||
### Report Details
|
||||
- [Status page example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `output.txt` contains the check resulting errors (invalid tabulation etc), blank page means no errors. [Successful result example](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt).
|
||||
|
@ -15,24 +15,24 @@ import website
|
||||
|
||||
def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
src_path = root
|
||||
src_index = os.path.join(src_path, 'index.html')
|
||||
src_index = os.path.join(src_path, "index.html")
|
||||
rel_path = os.path.relpath(src_path, site_temp)
|
||||
dst_path = os.path.join(main_site_dir, rel_path, 'amp')
|
||||
dst_index = os.path.join(dst_path, 'index.html')
|
||||
dst_path = os.path.join(main_site_dir, rel_path, "amp")
|
||||
dst_index = os.path.join(dst_path, "index.html")
|
||||
|
||||
logging.debug(f'Generating AMP version for {rel_path} ({lang})')
|
||||
logging.debug(f"Generating AMP version for {rel_path} ({lang})")
|
||||
os.makedirs(dst_path)
|
||||
with open(src_index, 'r') as f:
|
||||
with open(src_index, "r") as f:
|
||||
content = f.read()
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
command = f"purifycss --min {css_in} '{src_index}'"
|
||||
logging.debug(command)
|
||||
inline_css = subprocess.check_output(command, shell=True).decode('utf-8')
|
||||
inline_css = inline_css.replace('!important', '').replace('/*!', '/*')
|
||||
inline_css = subprocess.check_output(command, shell=True).decode("utf-8")
|
||||
inline_css = inline_css.replace("!important", "").replace("/*!", "/*")
|
||||
inline_css = cssmin.cssmin(inline_css)
|
||||
content = content.replace('CUSTOM_CSS_PLACEHOLDER', inline_css)
|
||||
content = content.replace("CUSTOM_CSS_PLACEHOLDER", inline_css)
|
||||
|
||||
with open(dst_index, 'w') as f:
|
||||
with open(dst_index, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
return dst_index
|
||||
@ -40,15 +40,12 @@ def prepare_amp_html(lang, args, root, site_temp, main_site_dir):
|
||||
|
||||
def build_amp(lang, args, cfg):
|
||||
# AMP docs: https://amp.dev/documentation/
|
||||
logging.info(f'Building AMP version for {lang}')
|
||||
logging.info(f"Building AMP version for {lang}")
|
||||
with util.temp_dir() as site_temp:
|
||||
extra = cfg.data['extra']
|
||||
main_site_dir = cfg.data['site_dir']
|
||||
extra['is_amp'] = True
|
||||
cfg.load_dict({
|
||||
'site_dir': site_temp,
|
||||
'extra': extra
|
||||
})
|
||||
extra = cfg.data["extra"]
|
||||
main_site_dir = cfg.data["site_dir"]
|
||||
extra["is_amp"] = True
|
||||
cfg.load_dict({"site_dir": site_temp, "extra": extra})
|
||||
|
||||
try:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
@ -60,50 +57,49 @@ def build_amp(lang, args, cfg):
|
||||
|
||||
paths = []
|
||||
for root, _, filenames in os.walk(site_temp):
|
||||
if 'index.html' in filenames:
|
||||
paths.append(prepare_amp_html(lang, args, root, site_temp, main_site_dir))
|
||||
logging.info(f'Finished building AMP version for {lang}')
|
||||
if "index.html" in filenames:
|
||||
paths.append(
|
||||
prepare_amp_html(lang, args, root, site_temp, main_site_dir)
|
||||
)
|
||||
logging.info(f"Finished building AMP version for {lang}")
|
||||
|
||||
|
||||
def html_to_amp(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for tag in soup.find_all():
|
||||
if tag.attrs.get('id') == 'tostring':
|
||||
tag.attrs['id'] = '_tostring'
|
||||
if tag.name == 'img':
|
||||
tag.name = 'amp-img'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
src = tag.attrs['src']
|
||||
if not (src.startswith('/') or src.startswith('http')):
|
||||
tag.attrs['src'] = f'../{src}'
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
if tag.name == 'iframe':
|
||||
tag.name = 'amp-iframe'
|
||||
tag.attrs['layout'] = 'responsive'
|
||||
del tag.attrs['alt']
|
||||
del tag.attrs['allowfullscreen']
|
||||
if not tag.attrs.get('width'):
|
||||
tag.attrs['width'] = '640'
|
||||
if not tag.attrs.get('height'):
|
||||
tag.attrs['height'] = '320'
|
||||
elif tag.name == 'a':
|
||||
href = tag.attrs.get('href')
|
||||
if tag.attrs.get("id") == "tostring":
|
||||
tag.attrs["id"] = "_tostring"
|
||||
if tag.name == "img":
|
||||
tag.name = "amp-img"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
src = tag.attrs["src"]
|
||||
if not (src.startswith("/") or src.startswith("http")):
|
||||
tag.attrs["src"] = f"../{src}"
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
if tag.name == "iframe":
|
||||
tag.name = "amp-iframe"
|
||||
tag.attrs["layout"] = "responsive"
|
||||
del tag.attrs["alt"]
|
||||
del tag.attrs["allowfullscreen"]
|
||||
if not tag.attrs.get("width"):
|
||||
tag.attrs["width"] = "640"
|
||||
if not tag.attrs.get("height"):
|
||||
tag.attrs["height"] = "320"
|
||||
elif tag.name == "a":
|
||||
href = tag.attrs.get("href")
|
||||
if href:
|
||||
if not (href.startswith('/') or href.startswith('http')):
|
||||
if '#' in href:
|
||||
href, anchor = href.split('#')
|
||||
if not (href.startswith("/") or href.startswith("http")):
|
||||
if "#" in href:
|
||||
href, anchor = href.split("#")
|
||||
else:
|
||||
anchor = None
|
||||
href = f'../{href}amp/'
|
||||
href = f"../{href}amp/"
|
||||
if anchor:
|
||||
href = f'{href}#{anchor}'
|
||||
tag.attrs['href'] = href
|
||||
href = f"{href}#{anchor}"
|
||||
tag.attrs["href"] = href
|
||||
content = str(soup)
|
||||
return website.minify_html(content)
|
||||
|
@ -17,54 +17,52 @@ import util
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} blog')
|
||||
logging.info(f"Building {lang} blog")
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English'
|
||||
}
|
||||
languages = {"en": "English"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse Blog'
|
||||
}
|
||||
site_names = {"en": "ClickHouse Blog"}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.blog_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en'])
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"])
|
||||
blog_nav, post_meta = nav.build_blog_nav(lang, args)
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/blog/{lang}/',
|
||||
site_url=f"{website_url}/blog/{lang}/",
|
||||
docs_dir=os.path.join(args.blog_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
nav=blog_nav,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/website/blog/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/website/blog/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -75,12 +73,12 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=True,
|
||||
post_meta=post_meta,
|
||||
today=datetime.date.today().isoformat()
|
||||
)
|
||||
today=datetime.date.today().isoformat(),
|
||||
),
|
||||
)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
@ -89,21 +87,28 @@ def build_for_lang(lang, args):
|
||||
redirects.build_blog_redirects(args)
|
||||
|
||||
env = util.init_jinja2_env(args)
|
||||
with open(os.path.join(args.website_dir, 'templates', 'blog', 'rss.xml'), 'rb') as f:
|
||||
rss_template_string = f.read().decode('utf-8').strip()
|
||||
with open(
|
||||
os.path.join(args.website_dir, "templates", "blog", "rss.xml"), "rb"
|
||||
) as f:
|
||||
rss_template_string = f.read().decode("utf-8").strip()
|
||||
rss_template = env.from_string(rss_template_string)
|
||||
with open(os.path.join(args.blog_output_dir, lang, 'rss.xml'), 'w') as f:
|
||||
f.write(rss_template.render({'config': raw_config}))
|
||||
with open(os.path.join(args.blog_output_dir, lang, "rss.xml"), "w") as f:
|
||||
f.write(rss_template.render({"config": raw_config}))
|
||||
|
||||
logging.info(f'Finished building {lang} blog')
|
||||
logging.info(f"Finished building {lang} blog")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_blog(args):
|
||||
tasks = []
|
||||
for lang in args.blog_lang.split(','):
|
||||
for lang in args.blog_lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
|
@ -30,76 +30,76 @@ import website
|
||||
|
||||
from cmake_in_clickhouse_generator import generate_cmake_flags_files
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
|
||||
md.preprocessors.register(
|
||||
self.ClickHousePreprocessor(), "clickhouse_preprocessor", 31
|
||||
)
|
||||
|
||||
|
||||
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
|
||||
|
||||
|
||||
def build_for_lang(lang, args):
|
||||
logging.info(f'Building {lang} docs')
|
||||
os.environ['SINGLE_PAGE'] = '0'
|
||||
logging.info(f"Building {lang} docs")
|
||||
os.environ["SINGLE_PAGE"] = "0"
|
||||
|
||||
try:
|
||||
theme_cfg = {
|
||||
'name': None,
|
||||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||||
'language': lang,
|
||||
'direction': 'rtl' if lang == 'fa' else 'ltr',
|
||||
'static_templates': ['404.html'],
|
||||
'extra': {
|
||||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||||
}
|
||||
"name": None,
|
||||
"custom_dir": os.path.join(os.path.dirname(__file__), "..", args.theme_dir),
|
||||
"language": lang,
|
||||
"direction": "rtl" if lang == "fa" else "ltr",
|
||||
"static_templates": ["404.html"],
|
||||
"extra": {
|
||||
"now": int(
|
||||
time.mktime(datetime.datetime.now().timetuple())
|
||||
) # TODO better way to avoid caching
|
||||
},
|
||||
}
|
||||
|
||||
# the following list of languages is sorted according to
|
||||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||||
languages = {
|
||||
'en': 'English',
|
||||
'zh': '中文',
|
||||
'ru': 'Русский',
|
||||
'ja': '日本語'
|
||||
}
|
||||
languages = {"en": "English", "zh": "中文", "ru": "Русский", "ja": "日本語"}
|
||||
|
||||
site_names = {
|
||||
'en': 'ClickHouse %s Documentation',
|
||||
'zh': 'ClickHouse文档 %s',
|
||||
'ru': 'Документация ClickHouse %s',
|
||||
'ja': 'ClickHouseドキュメント %s'
|
||||
"en": "ClickHouse %s Documentation",
|
||||
"zh": "ClickHouse文档 %s",
|
||||
"ru": "Документация ClickHouse %s",
|
||||
"ja": "ClickHouseドキュメント %s",
|
||||
}
|
||||
|
||||
assert len(site_names) == len(languages)
|
||||
|
||||
site_dir = os.path.join(args.docs_output_dir, lang)
|
||||
|
||||
plugins = ['macros']
|
||||
plugins = ["macros"]
|
||||
if args.htmlproofer:
|
||||
plugins.append('htmlproofer')
|
||||
plugins.append("htmlproofer")
|
||||
|
||||
website_url = 'https://clickhouse.com'
|
||||
site_name = site_names.get(lang, site_names['en']) % ''
|
||||
site_name = site_name.replace(' ', ' ')
|
||||
website_url = "https://clickhouse.com"
|
||||
site_name = site_names.get(lang, site_names["en"]) % ""
|
||||
site_name = site_name.replace(" ", " ")
|
||||
|
||||
raw_config = dict(
|
||||
site_name=site_name,
|
||||
site_url=f'{website_url}/docs/{lang}/',
|
||||
site_url=f"{website_url}/docs/{lang}/",
|
||||
docs_dir=os.path.join(args.docs_dir, lang),
|
||||
site_dir=site_dir,
|
||||
strict=True,
|
||||
theme=theme_cfg,
|
||||
copyright='©2016–2022 ClickHouse, Inc.',
|
||||
copyright="©2016–2022 ClickHouse, Inc.",
|
||||
use_directory_urls=True,
|
||||
repo_name='ClickHouse/ClickHouse',
|
||||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||||
edit_uri=f'edit/master/docs/{lang}',
|
||||
repo_name="ClickHouse/ClickHouse",
|
||||
repo_url="https://github.com/ClickHouse/ClickHouse/",
|
||||
edit_uri=f"edit/master/docs/{lang}",
|
||||
markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS,
|
||||
plugins=plugins,
|
||||
extra=dict(
|
||||
@ -111,16 +111,16 @@ def build_for_lang(lang, args):
|
||||
website_url=website_url,
|
||||
events=args.events,
|
||||
languages=languages,
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'),
|
||||
includes_dir=os.path.join(os.path.dirname(__file__), "..", "_includes"),
|
||||
is_amp=False,
|
||||
is_blog=False
|
||||
)
|
||||
is_blog=False,
|
||||
),
|
||||
)
|
||||
|
||||
# Clean to be safe if last build finished abnormally
|
||||
single_page.remove_temporary_files(lang, args)
|
||||
|
||||
raw_config['nav'] = nav.build_docs_nav(lang, args)
|
||||
raw_config["nav"] = nav.build_docs_nav(lang, args)
|
||||
|
||||
cfg = config.load_config(**raw_config)
|
||||
|
||||
@ -131,21 +131,28 @@ def build_for_lang(lang, args):
|
||||
amp.build_amp(lang, args, cfg)
|
||||
|
||||
if not args.skip_single_page:
|
||||
single_page.build_single_page_version(lang, args, raw_config.get('nav'), cfg)
|
||||
single_page.build_single_page_version(
|
||||
lang, args, raw_config.get("nav"), cfg
|
||||
)
|
||||
|
||||
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
|
||||
|
||||
logging.info(f'Finished building {lang} docs')
|
||||
logging.info(f"Finished building {lang} docs")
|
||||
|
||||
except exceptions.ConfigurationError as e:
|
||||
raise SystemExit('\n' + str(e))
|
||||
raise SystemExit("\n" + str(e))
|
||||
|
||||
|
||||
def build_docs(args):
|
||||
tasks = []
|
||||
for lang in args.lang.split(','):
|
||||
for lang in args.lang.split(","):
|
||||
if lang:
|
||||
tasks.append((lang, args,))
|
||||
tasks.append(
|
||||
(
|
||||
lang,
|
||||
args,
|
||||
)
|
||||
)
|
||||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||||
redirects.build_docs_redirects(args)
|
||||
|
||||
@ -171,56 +178,64 @@ def build(args):
|
||||
redirects.build_static_redirects(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if __name__ == "__main__":
|
||||
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
# A root path to ClickHouse source code.
|
||||
src_dir = '..'
|
||||
src_dir = ".."
|
||||
|
||||
website_dir = os.path.join(src_dir, 'website')
|
||||
website_dir = os.path.join(src_dir, "website")
|
||||
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('--lang', default='en,ru,zh,ja')
|
||||
arg_parser.add_argument('--blog-lang', default='en')
|
||||
arg_parser.add_argument('--docs-dir', default='.')
|
||||
arg_parser.add_argument('--theme-dir', default=website_dir)
|
||||
arg_parser.add_argument('--website-dir', default=website_dir)
|
||||
arg_parser.add_argument('--src-dir', default=src_dir)
|
||||
arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
|
||||
arg_parser.add_argument('--output-dir', default='build')
|
||||
arg_parser.add_argument('--nav-limit', type=int, default='0')
|
||||
arg_parser.add_argument('--skip-multi-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-single-page', action='store_true')
|
||||
arg_parser.add_argument('--skip-amp', action='store_true')
|
||||
arg_parser.add_argument('--skip-website', action='store_true')
|
||||
arg_parser.add_argument('--skip-blog', action='store_true')
|
||||
arg_parser.add_argument('--skip-git-log', action='store_true')
|
||||
arg_parser.add_argument('--skip-docs', action='store_true')
|
||||
arg_parser.add_argument('--test-only', action='store_true')
|
||||
arg_parser.add_argument('--minify', action='store_true')
|
||||
arg_parser.add_argument('--htmlproofer', action='store_true')
|
||||
arg_parser.add_argument('--no-docs-macros', action='store_true')
|
||||
arg_parser.add_argument('--save-raw-single-page', type=str)
|
||||
arg_parser.add_argument('--livereload', type=int, default='0')
|
||||
arg_parser.add_argument('--verbose', action='store_true')
|
||||
arg_parser.add_argument("--lang", default="en,ru,zh,ja")
|
||||
arg_parser.add_argument("--blog-lang", default="en")
|
||||
arg_parser.add_argument("--docs-dir", default=".")
|
||||
arg_parser.add_argument("--theme-dir", default=website_dir)
|
||||
arg_parser.add_argument("--website-dir", default=website_dir)
|
||||
arg_parser.add_argument("--src-dir", default=src_dir)
|
||||
arg_parser.add_argument("--blog-dir", default=os.path.join(website_dir, "blog"))
|
||||
arg_parser.add_argument("--output-dir", default="build")
|
||||
arg_parser.add_argument("--nav-limit", type=int, default="0")
|
||||
arg_parser.add_argument("--skip-multi-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-single-page", action="store_true")
|
||||
arg_parser.add_argument("--skip-amp", action="store_true")
|
||||
arg_parser.add_argument("--skip-website", action="store_true")
|
||||
arg_parser.add_argument("--skip-blog", action="store_true")
|
||||
arg_parser.add_argument("--skip-git-log", action="store_true")
|
||||
arg_parser.add_argument("--skip-docs", action="store_true")
|
||||
arg_parser.add_argument("--test-only", action="store_true")
|
||||
arg_parser.add_argument("--minify", action="store_true")
|
||||
arg_parser.add_argument("--htmlproofer", action="store_true")
|
||||
arg_parser.add_argument("--no-docs-macros", action="store_true")
|
||||
arg_parser.add_argument("--save-raw-single-page", type=str)
|
||||
arg_parser.add_argument("--livereload", type=int, default="0")
|
||||
arg_parser.add_argument("--verbose", action="store_true")
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
args.minify = False # TODO remove
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
stream=sys.stderr
|
||||
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
|
||||
)
|
||||
|
||||
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
|
||||
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
|
||||
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
|
||||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), "docs")
|
||||
args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), "blog")
|
||||
|
||||
from github import get_events
|
||||
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
|
||||
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
|
||||
|
||||
args.rev = (
|
||||
subprocess.check_output("git rev-parse HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_short = (
|
||||
subprocess.check_output("git rev-parse --short HEAD", shell=True)
|
||||
.decode("utf-8")
|
||||
.strip()
|
||||
)
|
||||
args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
|
||||
args.events = get_events(args)
|
||||
|
||||
if args.test_only:
|
||||
@ -233,18 +248,20 @@ if __name__ == '__main__':
|
||||
mdx_clickhouse.PatchedMacrosPlugin.skip_git_log = True
|
||||
|
||||
from build import build
|
||||
|
||||
build(args)
|
||||
|
||||
if args.livereload:
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
|
||||
new_args = sys.executable + ' ' + ' '.join(new_args)
|
||||
new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
|
||||
new_args = sys.executable + " " + " ".join(new_args)
|
||||
|
||||
server = livereload.Server()
|
||||
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||||
server.serve(
|
||||
root=args.output_dir,
|
||||
host='0.0.0.0',
|
||||
port=args.livereload
|
||||
server.watch(
|
||||
args.docs_dir + "**/*", livereload.shell(new_args, cwd="tools", shell=True)
|
||||
)
|
||||
server.watch(
|
||||
args.website_dir + "**/*",
|
||||
livereload.shell(new_args, cwd="tools", shell=True),
|
||||
)
|
||||
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
|
||||
sys.exit(0)
|
||||
|
@ -6,11 +6,13 @@ from typing import TextIO, List, Tuple, Optional, Dict
|
||||
Entity = Tuple[str, str, str]
|
||||
|
||||
# https://regex101.com/r/R6iogw/12
|
||||
cmake_option_regex: str = r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
cmake_option_regex: str = (
|
||||
r"^\s*option\s*\(([A-Z_0-9${}]+)\s*(?:\"((?:.|\n)*?)\")?\s*(.*)?\).*$"
|
||||
)
|
||||
|
||||
ch_master_url: str = "https://github.com/clickhouse/clickhouse/blob/master/"
|
||||
|
||||
name_str: str = "<a name=\"{anchor}\"></a>[`{name}`](" + ch_master_url + "{path}#L{line})"
|
||||
name_str: str = '<a name="{anchor}"></a>[`{name}`](' + ch_master_url + "{path}#L{line})"
|
||||
default_anchor_str: str = "[`{name}`](#{anchor})"
|
||||
|
||||
comment_var_regex: str = r"\${(.+)}"
|
||||
@ -27,11 +29,15 @@ entities: Dict[str, Tuple[str, str]] = {}
|
||||
|
||||
|
||||
def make_anchor(t: str) -> str:
|
||||
return "".join(["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"])
|
||||
return "".join(
|
||||
["-" if i == "_" else i.lower() for i in t if i.isalpha() or i == "_"]
|
||||
)
|
||||
|
||||
|
||||
def process_comment(comment: str) -> str:
|
||||
return re.sub(comment_var_regex, comment_var_replace, comment, flags=re.MULTILINE)
|
||||
|
||||
|
||||
def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> None:
|
||||
(line, comment) = line_comment
|
||||
(name, description, default) = entity
|
||||
@ -47,22 +53,22 @@ def build_entity(path: str, entity: Entity, line_comment: Tuple[int, str]) -> No
|
||||
formatted_default: str = "`" + default + "`"
|
||||
|
||||
formatted_name: str = name_str.format(
|
||||
anchor=make_anchor(name),
|
||||
name=name,
|
||||
path=path,
|
||||
line=line)
|
||||
anchor=make_anchor(name), name=name, path=path, line=line
|
||||
)
|
||||
|
||||
formatted_description: str = "".join(description.split("\n"))
|
||||
|
||||
formatted_comment: str = process_comment(comment)
|
||||
|
||||
formatted_entity: str = "| {} | {} | {} | {} |".format(
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment)
|
||||
formatted_name, formatted_default, formatted_description, formatted_comment
|
||||
)
|
||||
|
||||
entities[name] = path, formatted_entity
|
||||
|
||||
|
||||
def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
with open(os.path.join(file_path, file_name), 'r') as cmake_file:
|
||||
with open(os.path.join(file_path, file_name), "r") as cmake_file:
|
||||
contents: str = cmake_file.read()
|
||||
|
||||
def get_line_and_comment(target: str) -> Tuple[int, str]:
|
||||
@ -70,10 +76,10 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
comment: str = ""
|
||||
|
||||
for n, line in enumerate(contents_list):
|
||||
if 'option' not in line.lower() or target not in line:
|
||||
if "option" not in line.lower() or target not in line:
|
||||
continue
|
||||
|
||||
for maybe_comment_line in contents_list[n - 1::-1]:
|
||||
for maybe_comment_line in contents_list[n - 1 :: -1]:
|
||||
if not re.match("\s*#\s*", maybe_comment_line):
|
||||
break
|
||||
|
||||
@ -82,16 +88,22 @@ def process_file(root_path: str, file_path: str, file_name: str) -> None:
|
||||
# line numbering starts with 1
|
||||
return n + 1, comment
|
||||
|
||||
matches: Optional[List[Entity]] = re.findall(cmake_option_regex, contents, re.MULTILINE)
|
||||
matches: Optional[List[Entity]] = re.findall(
|
||||
cmake_option_regex, contents, re.MULTILINE
|
||||
)
|
||||
|
||||
|
||||
file_rel_path_with_name: str = os.path.join(file_path[len(root_path):], file_name)
|
||||
if file_rel_path_with_name.startswith('/'):
|
||||
file_rel_path_with_name: str = os.path.join(
|
||||
file_path[len(root_path) :], file_name
|
||||
)
|
||||
if file_rel_path_with_name.startswith("/"):
|
||||
file_rel_path_with_name = file_rel_path_with_name[1:]
|
||||
|
||||
if matches:
|
||||
for entity in matches:
|
||||
build_entity(file_rel_path_with_name, entity, get_line_and_comment(entity[0]))
|
||||
build_entity(
|
||||
file_rel_path_with_name, entity, get_line_and_comment(entity[0])
|
||||
)
|
||||
|
||||
|
||||
def process_folder(root_path: str, name: str) -> None:
|
||||
for root, _, files in os.walk(os.path.join(root_path, name)):
|
||||
@ -99,12 +111,19 @@ def process_folder(root_path: str, name: str) -> None:
|
||||
if f == "CMakeLists.txt" or ".cmake" in f:
|
||||
process_file(root_path, root, f)
|
||||
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), '..', '..')
|
||||
|
||||
output_file_name: str = os.path.join(root_path, "docs/en/development/cmake-in-clickhouse.md")
|
||||
header_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_header.md")
|
||||
footer_file_name: str = os.path.join(root_path, "docs/_includes/cmake_in_clickhouse_footer.md")
|
||||
def generate_cmake_flags_files() -> None:
|
||||
root_path: str = os.path.join(os.path.dirname(__file__), "..", "..")
|
||||
|
||||
output_file_name: str = os.path.join(
|
||||
root_path, "docs/en/development/cmake-in-clickhouse.md"
|
||||
)
|
||||
header_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_header.md"
|
||||
)
|
||||
footer_file_name: str = os.path.join(
|
||||
root_path, "docs/_includes/cmake_in_clickhouse_footer.md"
|
||||
)
|
||||
|
||||
process_file(root_path, root_path, "CMakeLists.txt")
|
||||
process_file(root_path, os.path.join(root_path, "programs"), "CMakeLists.txt")
|
||||
@ -127,8 +146,10 @@ def generate_cmake_flags_files() -> None:
|
||||
f.write(entities[k][1] + "\n")
|
||||
ignored_keys.append(k)
|
||||
|
||||
f.write("\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n" +
|
||||
table_header)
|
||||
f.write(
|
||||
"\n### External libraries\nNote that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras.\n"
|
||||
+ table_header
|
||||
)
|
||||
|
||||
for k in sorted_keys:
|
||||
if k.startswith("ENABLE_") and ".cmake" in entities[k][0]:
|
||||
@ -143,15 +164,18 @@ def generate_cmake_flags_files() -> None:
|
||||
with open(footer_file_name, "r") as footer:
|
||||
f.write(footer.read())
|
||||
|
||||
other_languages = ["docs/ja/development/cmake-in-clickhouse.md",
|
||||
"docs/zh/development/cmake-in-clickhouse.md",
|
||||
"docs/ru/development/cmake-in-clickhouse.md"]
|
||||
other_languages = [
|
||||
"docs/ja/development/cmake-in-clickhouse.md",
|
||||
"docs/zh/development/cmake-in-clickhouse.md",
|
||||
"docs/ru/development/cmake-in-clickhouse.md",
|
||||
]
|
||||
|
||||
for lang in other_languages:
|
||||
other_file_name = os.path.join(root_path, lang)
|
||||
if os.path.exists(other_file_name):
|
||||
os.unlink(other_file_name)
|
||||
os.unlink(other_file_name)
|
||||
os.symlink(output_file_name, other_file_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_cmake_flags_files()
|
||||
|
@ -8,7 +8,7 @@ import contextlib
|
||||
from git import cmd
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
SCRIPT_DESCRIPTION = '''
|
||||
SCRIPT_DESCRIPTION = """
|
||||
usage: ./easy_diff.py language/document path
|
||||
|
||||
Show the difference between a language document and an English document.
|
||||
@ -53,16 +53,16 @@ SCRIPT_DESCRIPTION = '''
|
||||
OPTIONS:
|
||||
-h, --help show this help message and exit
|
||||
--no-pager use stdout as difference result output
|
||||
'''
|
||||
"""
|
||||
|
||||
SCRIPT_PATH = os.path.abspath(__file__)
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), '..', '..')
|
||||
CLICKHOUSE_REPO_HOME = os.path.join(os.path.dirname(SCRIPT_PATH), "..", "..")
|
||||
SCRIPT_COMMAND_EXECUTOR = cmd.Git(CLICKHOUSE_REPO_HOME)
|
||||
|
||||
SCRIPT_COMMAND_PARSER = argparse.ArgumentParser(add_help=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('path', type=bytes, nargs='?', default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('--no-pager', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument('-h', '--help', action='store_true', default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("path", type=bytes, nargs="?", default=None)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("--no-pager", action="store_true", default=False)
|
||||
SCRIPT_COMMAND_PARSER.add_argument("-h", "--help", action="store_true", default=False)
|
||||
|
||||
|
||||
def execute(commands):
|
||||
@ -70,19 +70,41 @@ def execute(commands):
|
||||
|
||||
|
||||
def get_hash(file_name):
|
||||
return execute(['git', 'log', '-n', '1', '--pretty=format:"%H"', file_name])
|
||||
return execute(["git", "log", "-n", "1", '--pretty=format:"%H"', file_name])
|
||||
|
||||
|
||||
def diff_file(reference_file, working_file, out):
|
||||
if not os.path.exists(reference_file):
|
||||
raise RuntimeError('reference file [' + os.path.abspath(reference_file) + '] is not exists.')
|
||||
raise RuntimeError(
|
||||
"reference file [" + os.path.abspath(reference_file) + "] is not exists."
|
||||
)
|
||||
|
||||
if os.path.islink(working_file):
|
||||
out.writelines(["Need translate document:" + os.path.abspath(reference_file)])
|
||||
elif not os.path.exists(working_file):
|
||||
out.writelines(['Need link document ' + os.path.abspath(reference_file) + ' to ' + os.path.abspath(working_file)])
|
||||
out.writelines(
|
||||
[
|
||||
"Need link document "
|
||||
+ os.path.abspath(reference_file)
|
||||
+ " to "
|
||||
+ os.path.abspath(working_file)
|
||||
]
|
||||
)
|
||||
elif get_hash(working_file) != get_hash(reference_file):
|
||||
out.writelines([(execute(['git', 'diff', get_hash(working_file).strip('"'), reference_file]).encode('utf-8'))])
|
||||
out.writelines(
|
||||
[
|
||||
(
|
||||
execute(
|
||||
[
|
||||
"git",
|
||||
"diff",
|
||||
get_hash(working_file).strip('"'),
|
||||
reference_file,
|
||||
]
|
||||
).encode("utf-8")
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@ -94,20 +116,30 @@ def diff_directory(reference_directory, working_directory, out):
|
||||
for list_item in os.listdir(reference_directory):
|
||||
working_item = os.path.join(working_directory, list_item)
|
||||
reference_item = os.path.join(reference_directory, list_item)
|
||||
if diff_file(reference_item, working_item, out) if os.path.isfile(reference_item) else diff_directory(reference_item, working_item, out) != 0:
|
||||
if (
|
||||
diff_file(reference_item, working_item, out)
|
||||
if os.path.isfile(reference_item)
|
||||
else diff_directory(reference_item, working_item, out) != 0
|
||||
):
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def find_language_doc(custom_document, other_language='en', children=[]):
|
||||
def find_language_doc(custom_document, other_language="en", children=[]):
|
||||
if len(custom_document) == 0:
|
||||
raise RuntimeError('The ' + os.path.join(custom_document, *children) + " is not in docs directory.")
|
||||
raise RuntimeError(
|
||||
"The "
|
||||
+ os.path.join(custom_document, *children)
|
||||
+ " is not in docs directory."
|
||||
)
|
||||
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, 'docs'), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, 'docs', other_language, *children[1:])
|
||||
if os.path.samefile(os.path.join(CLICKHOUSE_REPO_HOME, "docs"), custom_document):
|
||||
return os.path.join(CLICKHOUSE_REPO_HOME, "docs", other_language, *children[1:])
|
||||
children.insert(0, os.path.split(custom_document)[1])
|
||||
return find_language_doc(os.path.split(custom_document)[0], other_language, children)
|
||||
return find_language_doc(
|
||||
os.path.split(custom_document)[0], other_language, children
|
||||
)
|
||||
|
||||
|
||||
class ToPager:
|
||||
@ -119,7 +151,7 @@ class ToPager:
|
||||
|
||||
def close(self):
|
||||
self.temp_named_file.flush()
|
||||
git_pager = execute(['git', 'var', 'GIT_PAGER'])
|
||||
git_pager = execute(["git", "var", "GIT_PAGER"])
|
||||
subprocess.check_call([git_pager, self.temp_named_file.name])
|
||||
self.temp_named_file.close()
|
||||
|
||||
@ -135,12 +167,20 @@ class ToStdOut:
|
||||
self.system_stdout_stream = system_stdout_stream
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
arguments = SCRIPT_COMMAND_PARSER.parse_args()
|
||||
if arguments.help or not arguments.path:
|
||||
sys.stdout.write(SCRIPT_DESCRIPTION)
|
||||
sys.exit(0)
|
||||
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, 'docs', arguments.path)
|
||||
with contextlib.closing(ToStdOut(sys.stdout) if arguments.no_pager else ToPager(NamedTemporaryFile('r+'))) as writer:
|
||||
exit(diff_directory(find_language_doc(working_language), working_language, writer))
|
||||
working_language = os.path.join(CLICKHOUSE_REPO_HOME, "docs", arguments.path)
|
||||
with contextlib.closing(
|
||||
ToStdOut(sys.stdout)
|
||||
if arguments.no_pager
|
||||
else ToPager(NamedTemporaryFile("r+"))
|
||||
) as writer:
|
||||
exit(
|
||||
diff_directory(
|
||||
find_language_doc(working_language), working_language, writer
|
||||
)
|
||||
)
|
||||
|
@ -16,27 +16,26 @@ import util
|
||||
def get_events(args):
|
||||
events = []
|
||||
skip = True
|
||||
with open(os.path.join(args.docs_dir, '..', 'README.md')) as f:
|
||||
with open(os.path.join(args.docs_dir, "..", "README.md")) as f:
|
||||
for line in f:
|
||||
if skip:
|
||||
if 'Upcoming Events' in line:
|
||||
if "Upcoming Events" in line:
|
||||
skip = False
|
||||
else:
|
||||
if not line:
|
||||
continue
|
||||
line = line.strip().split('](')
|
||||
line = line.strip().split("](")
|
||||
if len(line) == 2:
|
||||
tail = line[1].split(') ')
|
||||
events.append({
|
||||
'signup_link': tail[0],
|
||||
'event_name': line[0].replace('* [', ''),
|
||||
'event_date': tail[1].replace('on ', '').replace('.', '')
|
||||
})
|
||||
tail = line[1].split(") ")
|
||||
events.append(
|
||||
{
|
||||
"signup_link": tail[0],
|
||||
"event_name": line[0].replace("* [", ""),
|
||||
"event_date": tail[1].replace("on ", "").replace(".", ""),
|
||||
}
|
||||
)
|
||||
return events
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
|
@ -16,74 +16,79 @@ import slugify as slugify_impl
|
||||
|
||||
|
||||
def slugify(value, separator):
|
||||
return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True)
|
||||
return slugify_impl.slugify(
|
||||
value, separator=separator, word_boundary=True, save_order=True
|
||||
)
|
||||
|
||||
|
||||
MARKDOWN_EXTENSIONS = [
|
||||
'mdx_clickhouse',
|
||||
'admonition',
|
||||
'attr_list',
|
||||
'def_list',
|
||||
'codehilite',
|
||||
'nl2br',
|
||||
'sane_lists',
|
||||
'pymdownx.details',
|
||||
'pymdownx.magiclink',
|
||||
'pymdownx.superfences',
|
||||
'extra',
|
||||
{
|
||||
'toc': {
|
||||
'permalink': True,
|
||||
'slugify': slugify
|
||||
}
|
||||
}
|
||||
"mdx_clickhouse",
|
||||
"admonition",
|
||||
"attr_list",
|
||||
"def_list",
|
||||
"codehilite",
|
||||
"nl2br",
|
||||
"sane_lists",
|
||||
"pymdownx.details",
|
||||
"pymdownx.magiclink",
|
||||
"pymdownx.superfences",
|
||||
"extra",
|
||||
{"toc": {"permalink": True, "slugify": slugify}},
|
||||
]
|
||||
|
||||
|
||||
class ClickHouseLinkMixin(object):
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
single_page = (os.environ.get('SINGLE_PAGE') == '1')
|
||||
single_page = os.environ.get("SINGLE_PAGE") == "1"
|
||||
try:
|
||||
el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
if el is not None:
|
||||
href = el.get('href') or ''
|
||||
is_external = href.startswith('http:') or href.startswith('https:')
|
||||
href = el.get("href") or ""
|
||||
is_external = href.startswith("http:") or href.startswith("https:")
|
||||
if is_external:
|
||||
if not href.startswith('https://clickhouse.com'):
|
||||
el.set('rel', 'external nofollow noreferrer')
|
||||
if not href.startswith("https://clickhouse.com"):
|
||||
el.set("rel", "external nofollow noreferrer")
|
||||
elif single_page:
|
||||
if '#' in href:
|
||||
el.set('href', '#' + href.split('#', 1)[1])
|
||||
if "#" in href:
|
||||
el.set("href", "#" + href.split("#", 1)[1])
|
||||
else:
|
||||
el.set('href', '#' + href.replace('/index.md', '/').replace('.md', '/'))
|
||||
el.set(
|
||||
"href", "#" + href.replace("/index.md", "/").replace(".md", "/")
|
||||
)
|
||||
return el, start, end
|
||||
|
||||
|
||||
class ClickHouseAutolinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor):
|
||||
class ClickHouseAutolinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHouseLinkPattern(ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor):
|
||||
class ClickHouseLinkPattern(
|
||||
ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
class ClickHousePreprocessor(markdown.util.Processor):
|
||||
def run(self, lines):
|
||||
for line in lines:
|
||||
if '<!--hide-->' not in line:
|
||||
if "<!--hide-->" not in line:
|
||||
yield line
|
||||
|
||||
|
||||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||||
|
||||
def extendMarkdown(self, md, md_globals):
|
||||
md.preprocessors['clickhouse'] = ClickHousePreprocessor()
|
||||
md.inlinePatterns['link'] = ClickHouseLinkPattern(markdown.inlinepatterns.LINK_RE, md)
|
||||
md.inlinePatterns['autolink'] = ClickHouseAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md)
|
||||
md.preprocessors["clickhouse"] = ClickHousePreprocessor()
|
||||
md.inlinePatterns["link"] = ClickHouseLinkPattern(
|
||||
markdown.inlinepatterns.LINK_RE, md
|
||||
)
|
||||
md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
|
||||
markdown.inlinepatterns.AUTOLINK_RE, md
|
||||
)
|
||||
|
||||
|
||||
def makeExtension(**kwargs):
|
||||
@ -92,10 +97,8 @@ def makeExtension(**kwargs):
|
||||
|
||||
def get_translations(dirname, lang):
|
||||
import babel.support
|
||||
return babel.support.Translations.load(
|
||||
dirname=dirname,
|
||||
locales=[lang, 'en']
|
||||
)
|
||||
|
||||
return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
|
||||
|
||||
|
||||
class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
@ -104,22 +107,22 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
|
||||
def on_config(self, config):
|
||||
super(PatchedMacrosPlugin, self).on_config(config)
|
||||
self.env.comment_start_string = '{##'
|
||||
self.env.comment_end_string = '##}'
|
||||
self.env.loader = jinja2.FileSystemLoader([
|
||||
os.path.join(config.data['site_dir']),
|
||||
os.path.join(config.data['extra']['includes_dir'])
|
||||
])
|
||||
self.env.comment_start_string = "{##"
|
||||
self.env.comment_end_string = "##}"
|
||||
self.env.loader = jinja2.FileSystemLoader(
|
||||
[
|
||||
os.path.join(config.data["site_dir"]),
|
||||
os.path.join(config.data["extra"]["includes_dir"]),
|
||||
]
|
||||
)
|
||||
|
||||
def on_env(self, env, config, files):
|
||||
import util
|
||||
env.add_extension('jinja2.ext.i18n')
|
||||
dirname = os.path.join(config.data['theme'].dirs[0], 'locale')
|
||||
lang = config.data['theme']['language']
|
||||
env.install_gettext_translations(
|
||||
get_translations(dirname, lang),
|
||||
newstyle=True
|
||||
)
|
||||
|
||||
env.add_extension("jinja2.ext.i18n")
|
||||
dirname = os.path.join(config.data["theme"].dirs[0], "locale")
|
||||
lang = config.data["theme"]["language"]
|
||||
env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
|
||||
util.init_jinja2_filters(env)
|
||||
return env
|
||||
|
||||
@ -130,13 +133,17 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
|
||||
return markdown
|
||||
|
||||
def on_page_markdown(self, markdown, page, config, files):
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(markdown, page, config, files)
|
||||
markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
|
||||
markdown, page, config, files
|
||||
)
|
||||
|
||||
if os.path.islink(page.file.abs_src_path):
|
||||
lang = config.data['theme']['language']
|
||||
page.canonical_url = page.canonical_url.replace(f'/{lang}/', '/en/', 1)
|
||||
lang = config.data["theme"]["language"]
|
||||
page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
|
||||
|
||||
if config.data['extra'].get('version_prefix') or config.data['extra'].get('single_page'):
|
||||
if config.data["extra"].get("version_prefix") or config.data["extra"].get(
|
||||
"single_page"
|
||||
):
|
||||
return markdown
|
||||
if self.skip_git_log:
|
||||
return markdown
|
||||
|
@ -10,57 +10,59 @@ import util
|
||||
|
||||
|
||||
def find_first_header(content):
|
||||
for line in content.split('\n'):
|
||||
if line.startswith('#'):
|
||||
no_hash = line.lstrip('#')
|
||||
return no_hash.split('{', 1)[0].strip()
|
||||
for line in content.split("\n"):
|
||||
if line.startswith("#"):
|
||||
no_hash = line.lstrip("#")
|
||||
return no_hash.split("{", 1)[0].strip()
|
||||
|
||||
|
||||
def build_nav_entry(root, args):
|
||||
if root.endswith('images'):
|
||||
if root.endswith("images"):
|
||||
return None, None, None
|
||||
result_items = []
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md'))
|
||||
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title'))
|
||||
current_title = current_title or index_meta.get('title', find_first_header(index_content))
|
||||
index_meta, index_content = util.read_md_file(os.path.join(root, "index.md"))
|
||||
current_title = index_meta.get("toc_folder_title", index_meta.get("toc_title"))
|
||||
current_title = current_title or index_meta.get(
|
||||
"title", find_first_header(index_content)
|
||||
)
|
||||
for filename in os.listdir(root):
|
||||
path = os.path.join(root, filename)
|
||||
if os.path.isdir(path):
|
||||
prio, title, payload = build_nav_entry(path, args)
|
||||
if title and payload:
|
||||
result_items.append((prio, title, payload))
|
||||
elif filename.endswith('.md'):
|
||||
elif filename.endswith(".md"):
|
||||
path = os.path.join(root, filename)
|
||||
|
||||
meta = ''
|
||||
content = ''
|
||||
meta = ""
|
||||
content = ""
|
||||
|
||||
try:
|
||||
meta, content = util.read_md_file(path)
|
||||
except:
|
||||
print('Error in file: {}'.format(path))
|
||||
print("Error in file: {}".format(path))
|
||||
raise
|
||||
|
||||
path = path.split('/', 2)[-1]
|
||||
title = meta.get('toc_title', find_first_header(content))
|
||||
path = path.split("/", 2)[-1]
|
||||
title = meta.get("toc_title", find_first_header(content))
|
||||
if title:
|
||||
title = title.strip().rstrip('.')
|
||||
title = title.strip().rstrip(".")
|
||||
else:
|
||||
title = meta.get('toc_folder_title', 'hidden')
|
||||
prio = meta.get('toc_priority', 9999)
|
||||
logging.debug(f'Nav entry: {prio}, {title}, {path}')
|
||||
if meta.get('toc_hidden') or not content.strip():
|
||||
title = 'hidden'
|
||||
if title == 'hidden':
|
||||
title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest()
|
||||
title = meta.get("toc_folder_title", "hidden")
|
||||
prio = meta.get("toc_priority", 9999)
|
||||
logging.debug(f"Nav entry: {prio}, {title}, {path}")
|
||||
if meta.get("toc_hidden") or not content.strip():
|
||||
title = "hidden"
|
||||
if title == "hidden":
|
||||
title = "hidden-" + hashlib.sha1(content.encode("utf-8")).hexdigest()
|
||||
if args.nav_limit and len(result_items) >= args.nav_limit:
|
||||
break
|
||||
result_items.append((prio, title, path))
|
||||
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
|
||||
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
|
||||
if index_meta.get('toc_hidden_folder'):
|
||||
current_title += '|hidden-folder'
|
||||
return index_meta.get('toc_priority', 10000), current_title, result
|
||||
if index_meta.get("toc_hidden_folder"):
|
||||
current_title += "|hidden-folder"
|
||||
return index_meta.get("toc_priority", 10000), current_title, result
|
||||
|
||||
|
||||
def build_docs_nav(lang, args):
|
||||
@ -70,7 +72,7 @@ def build_docs_nav(lang, args):
|
||||
index_key = None
|
||||
for key, value in list(nav.items()):
|
||||
if key and value:
|
||||
if value == 'index.md':
|
||||
if value == "index.md":
|
||||
index_key = key
|
||||
continue
|
||||
result.append({key: value})
|
||||
@ -78,7 +80,7 @@ def build_docs_nav(lang, args):
|
||||
break
|
||||
if index_key:
|
||||
key = list(result[0].keys())[0]
|
||||
result[0][key][index_key] = 'index.md'
|
||||
result[0][key][index_key] = "index.md"
|
||||
result[0][key].move_to_end(index_key, last=False)
|
||||
return result
|
||||
|
||||
@ -86,7 +88,7 @@ def build_docs_nav(lang, args):
|
||||
def build_blog_nav(lang, args):
|
||||
blog_dir = os.path.join(args.blog_dir, lang)
|
||||
years = sorted(os.listdir(blog_dir), reverse=True)
|
||||
result_nav = [{'hidden': 'index.md'}]
|
||||
result_nav = [{"hidden": "index.md"}]
|
||||
post_meta = collections.OrderedDict()
|
||||
for year in years:
|
||||
year_dir = os.path.join(blog_dir, year)
|
||||
@ -97,38 +99,53 @@ def build_blog_nav(lang, args):
|
||||
post_meta_items = []
|
||||
for post in os.listdir(year_dir):
|
||||
post_path = os.path.join(year_dir, post)
|
||||
if not post.endswith('.md'):
|
||||
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
|
||||
if not post.endswith(".md"):
|
||||
raise RuntimeError(
|
||||
f"Unexpected non-md file in posts folder: {post_path}"
|
||||
)
|
||||
meta, _ = util.read_md_file(post_path)
|
||||
post_date = meta['date']
|
||||
post_title = meta['title']
|
||||
post_date = meta["date"]
|
||||
post_title = meta["title"]
|
||||
if datetime.date.fromisoformat(post_date) > datetime.date.today():
|
||||
continue
|
||||
posts.append(
|
||||
(post_date, post_title, os.path.join(year, post),)
|
||||
(
|
||||
post_date,
|
||||
post_title,
|
||||
os.path.join(year, post),
|
||||
)
|
||||
)
|
||||
if post_title in post_meta:
|
||||
raise RuntimeError(f'Duplicate post title: {post_title}')
|
||||
if not post_date.startswith(f'{year}-'):
|
||||
raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}')
|
||||
post_url_part = post.replace('.md', '')
|
||||
post_meta_items.append((post_date, {
|
||||
'date': post_date,
|
||||
'title': post_title,
|
||||
'image': meta.get('image'),
|
||||
'url': f'/blog/{lang}/{year}/{post_url_part}/'
|
||||
},))
|
||||
raise RuntimeError(f"Duplicate post title: {post_title}")
|
||||
if not post_date.startswith(f"{year}-"):
|
||||
raise RuntimeError(
|
||||
f"Post date {post_date} doesn't match the folder year {year}: {post_title}"
|
||||
)
|
||||
post_url_part = post.replace(".md", "")
|
||||
post_meta_items.append(
|
||||
(
|
||||
post_date,
|
||||
{
|
||||
"date": post_date,
|
||||
"title": post_title,
|
||||
"image": meta.get("image"),
|
||||
"url": f"/blog/{lang}/{year}/{post_url_part}/",
|
||||
},
|
||||
)
|
||||
)
|
||||
for _, title, path in sorted(posts, reverse=True):
|
||||
result_nav[-1][year][title] = path
|
||||
for _, post_meta_item in sorted(post_meta_items,
|
||||
reverse=True,
|
||||
key=lambda item: item[0]):
|
||||
post_meta[post_meta_item['title']] = post_meta_item
|
||||
for _, post_meta_item in sorted(
|
||||
post_meta_items, reverse=True, key=lambda item: item[0]
|
||||
):
|
||||
post_meta[post_meta_item["title"]] = post_meta_item
|
||||
return result_nav, post_meta
|
||||
|
||||
|
||||
def _custom_get_navigation(files, config):
|
||||
nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages())
|
||||
nav_config = config["nav"] or mkdocs.structure.nav.nest_paths(
|
||||
f.src_path for f in files.documentation_pages()
|
||||
)
|
||||
items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config)
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
@ -138,19 +155,25 @@ def _custom_get_navigation(files, config):
|
||||
mkdocs.structure.nav._add_previous_and_next_links(pages)
|
||||
mkdocs.structure.nav._add_parent_links(items)
|
||||
|
||||
missing_from_config = [file for file in files.documentation_pages() if file.page is None]
|
||||
missing_from_config = [
|
||||
file for file in files.documentation_pages() if file.page is None
|
||||
]
|
||||
if missing_from_config:
|
||||
files._files = [file for file in files._files if file not in missing_from_config]
|
||||
files._files = [
|
||||
file for file in files._files if file not in missing_from_config
|
||||
]
|
||||
|
||||
links = mkdocs.structure.nav._get_by_type(items, mkdocs.structure.nav.Link)
|
||||
for link in links:
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(link.url)
|
||||
scheme, netloc, path, params, query, fragment = mkdocs.structure.nav.urlparse(
|
||||
link.url
|
||||
)
|
||||
if scheme or netloc:
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An external link to '{}' is included in "
|
||||
"the 'nav' configuration.".format(link.url)
|
||||
)
|
||||
elif link.url.startswith('/'):
|
||||
elif link.url.startswith("/"):
|
||||
mkdocs.structure.nav.log.debug(
|
||||
"An absolute path to '{}' is included in the 'nav' configuration, "
|
||||
"which presumably points to an external resource.".format(link.url)
|
||||
|
@ -7,8 +7,9 @@ def write_redirect_html(out_path, to_url):
|
||||
os.makedirs(out_dir)
|
||||
except OSError:
|
||||
pass
|
||||
with open(out_path, 'w') as f:
|
||||
f.write(f'''<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
with open(out_path, "w") as f:
|
||||
f.write(
|
||||
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
<!DOCTYPE HTML>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
@ -22,18 +23,20 @@ def write_redirect_html(out_path, to_url):
|
||||
<body>
|
||||
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
|
||||
</body>
|
||||
</html>''')
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path):
|
||||
out_path = os.path.join(
|
||||
output_dir, lang,
|
||||
from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
|
||||
output_dir,
|
||||
lang,
|
||||
from_path.replace("/index.md", "/index.html").replace(".md", "/index.html"),
|
||||
)
|
||||
target_path = to_path.replace('/index.md', '/').replace('.md', '/')
|
||||
target_path = to_path.replace("/index.md", "/").replace(".md", "/")
|
||||
|
||||
if target_path[0:7] != 'http://' and target_path[0:8] != 'https://':
|
||||
to_url = f'/{base_prefix}/{lang}/{target_path}'
|
||||
if target_path[0:7] != "http://" and target_path[0:8] != "https://":
|
||||
to_url = f"/{base_prefix}/{lang}/{target_path}"
|
||||
else:
|
||||
to_url = target_path
|
||||
|
||||
@ -42,33 +45,48 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
|
||||
|
||||
|
||||
def build_docs_redirects(args):
|
||||
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
|
||||
with open(os.path.join(args.docs_dir, "redirects.txt"), "r") as f:
|
||||
for line in f:
|
||||
for lang in args.lang.split(','):
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path)
|
||||
for lang in args.lang.split(","):
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "docs", lang, args.docs_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_blog_redirects(args):
|
||||
for lang in args.blog_lang.split(','):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt')
|
||||
for lang in args.blog_lang.split(","):
|
||||
redirects_path = os.path.join(args.blog_dir, lang, "redirects.txt")
|
||||
if os.path.exists(redirects_path):
|
||||
with open(redirects_path, 'r') as f:
|
||||
with open(redirects_path, "r") as f:
|
||||
for line in f:
|
||||
from_path, to_path = line.split(' ', 1)
|
||||
build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path)
|
||||
from_path, to_path = line.split(" ", 1)
|
||||
build_redirect_html(
|
||||
args, "blog", lang, args.blog_output_dir, from_path, to_path
|
||||
)
|
||||
|
||||
|
||||
def build_static_redirects(args):
|
||||
for static_redirect in [
|
||||
('benchmark.html', '/benchmark/dbms/'),
|
||||
('benchmark_hardware.html', '/benchmark/hardware/'),
|
||||
('tutorial.html', '/docs/en/getting_started/tutorial/',),
|
||||
('reference_en.html', '/docs/en/single/', ),
|
||||
('reference_ru.html', '/docs/ru/single/',),
|
||||
('docs/index.html', '/docs/en/',),
|
||||
("benchmark.html", "/benchmark/dbms/"),
|
||||
("benchmark_hardware.html", "/benchmark/hardware/"),
|
||||
(
|
||||
"tutorial.html",
|
||||
"/docs/en/getting_started/tutorial/",
|
||||
),
|
||||
(
|
||||
"reference_en.html",
|
||||
"/docs/en/single/",
|
||||
),
|
||||
(
|
||||
"reference_ru.html",
|
||||
"/docs/ru/single/",
|
||||
),
|
||||
(
|
||||
"docs/index.html",
|
||||
"/docs/en/",
|
||||
),
|
||||
]:
|
||||
write_redirect_html(
|
||||
os.path.join(args.output_dir, static_redirect[0]),
|
||||
static_redirect[1]
|
||||
os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
|
||||
)
|
||||
|
@ -12,7 +12,8 @@ import test
|
||||
import util
|
||||
import website
|
||||
|
||||
TEMPORARY_FILE_NAME = 'single.md'
|
||||
TEMPORARY_FILE_NAME = "single.md"
|
||||
|
||||
|
||||
def recursive_values(item):
|
||||
if isinstance(item, dict):
|
||||
@ -25,11 +26,14 @@ def recursive_values(item):
|
||||
yield item
|
||||
|
||||
|
||||
anchor_not_allowed_chars = re.compile(r'[^\w\-]')
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, '-', path)
|
||||
anchor_not_allowed_chars = re.compile(r"[^\w\-]")
|
||||
|
||||
absolute_link = re.compile(r'^https?://')
|
||||
|
||||
def generate_anchor_from_path(path):
|
||||
return re.sub(anchor_not_allowed_chars, "-", path)
|
||||
|
||||
|
||||
absolute_link = re.compile(r"^https?://")
|
||||
|
||||
|
||||
def replace_link(match, path):
|
||||
@ -40,46 +44,55 @@ def replace_link(match, path):
|
||||
if re.search(absolute_link, link):
|
||||
return match.group(0)
|
||||
|
||||
if link.endswith('/'):
|
||||
link = link[0:-1] + '.md'
|
||||
if link.endswith("/"):
|
||||
link = link[0:-1] + ".md"
|
||||
|
||||
return '{}(#{})'.format(title, generate_anchor_from_path(os.path.normpath(os.path.join(os.path.dirname(path), link))))
|
||||
return "{}(#{})".format(
|
||||
title,
|
||||
generate_anchor_from_path(
|
||||
os.path.normpath(os.path.join(os.path.dirname(path), link))
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# Concatenates Markdown files to a single file.
|
||||
def concatenate(lang, docs_path, single_page_file, nav):
|
||||
lang_path = os.path.join(docs_path, lang)
|
||||
|
||||
proj_config = f'{docs_path}/toc_{lang}.yml'
|
||||
proj_config = f"{docs_path}/toc_{lang}.yml"
|
||||
if os.path.exists(proj_config):
|
||||
with open(proj_config) as cfg_file:
|
||||
nav = yaml.full_load(cfg_file.read())['nav']
|
||||
nav = yaml.full_load(cfg_file.read())["nav"]
|
||||
|
||||
files_to_concatenate = list(recursive_values(nav))
|
||||
files_count = len(files_to_concatenate)
|
||||
logging.info(f'{files_count} files will be concatenated into single md-file for {lang}.')
|
||||
logging.debug('Concatenating: ' + ', '.join(files_to_concatenate))
|
||||
assert files_count > 0, f'Empty single-page for {lang}'
|
||||
logging.info(
|
||||
f"{files_count} files will be concatenated into single md-file for {lang}."
|
||||
)
|
||||
logging.debug("Concatenating: " + ", ".join(files_to_concatenate))
|
||||
assert files_count > 0, f"Empty single-page for {lang}"
|
||||
|
||||
link_regexp = re.compile(r'(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)')
|
||||
link_regexp = re.compile(r"(\[[^\]]+\])\(([^)#]+)(?:#[^\)]+)?\)")
|
||||
|
||||
for path in files_to_concatenate:
|
||||
try:
|
||||
with open(os.path.join(lang_path, path)) as f:
|
||||
# Insert a horizontal ruler. Then insert an anchor that we will link to. Its name will be a path to the .md file.
|
||||
single_page_file.write('\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path))
|
||||
single_page_file.write(
|
||||
'\n______\n<a name="%s"></a>\n' % generate_anchor_from_path(path)
|
||||
)
|
||||
|
||||
in_metadata = False
|
||||
for line in f:
|
||||
# Skip YAML metadata.
|
||||
if line == '---\n':
|
||||
if line == "---\n":
|
||||
in_metadata = not in_metadata
|
||||
continue
|
||||
|
||||
if not in_metadata:
|
||||
# Increase the level of headers.
|
||||
if line.startswith('#'):
|
||||
line = '#' + line
|
||||
if line.startswith("#"):
|
||||
line = "#" + line
|
||||
|
||||
# Replace links within the docs.
|
||||
|
||||
@ -87,14 +100,19 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
line = re.sub(
|
||||
link_regexp,
|
||||
lambda match: replace_link(match, path),
|
||||
line)
|
||||
line,
|
||||
)
|
||||
|
||||
# If failed to replace the relative link, print to log
|
||||
# But with some exceptions:
|
||||
# - "../src/" -- for cmake-in-clickhouse.md (link to sources)
|
||||
# - "../usr/share" -- changelog entry that has "../usr/share/zoneinfo"
|
||||
if '../' in line and (not '../usr/share' in line) and (not '../src/' in line):
|
||||
logging.info('Failed to resolve relative link:')
|
||||
if (
|
||||
"../" in line
|
||||
and (not "../usr/share" in line)
|
||||
and (not "../src/" in line)
|
||||
):
|
||||
logging.info("Failed to resolve relative link:")
|
||||
logging.info(path)
|
||||
logging.info(line)
|
||||
|
||||
@ -105,9 +123,11 @@ def concatenate(lang, docs_path, single_page_file, nav):
|
||||
|
||||
single_page_file.flush()
|
||||
|
||||
|
||||
def get_temporary_file_name(lang, args):
|
||||
return os.path.join(args.docs_dir, lang, TEMPORARY_FILE_NAME)
|
||||
|
||||
|
||||
def remove_temporary_files(lang, args):
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
if os.path.exists(single_md_path):
|
||||
@ -115,14 +135,14 @@ def remove_temporary_files(lang, args):
|
||||
|
||||
|
||||
def build_single_page_version(lang, args, nav, cfg):
|
||||
logging.info(f'Building single page version for {lang}')
|
||||
os.environ['SINGLE_PAGE'] = '1'
|
||||
extra = cfg.data['extra']
|
||||
extra['single_page'] = True
|
||||
extra['is_amp'] = False
|
||||
logging.info(f"Building single page version for {lang}")
|
||||
os.environ["SINGLE_PAGE"] = "1"
|
||||
extra = cfg.data["extra"]
|
||||
extra["single_page"] = True
|
||||
extra["is_amp"] = False
|
||||
|
||||
single_md_path = get_temporary_file_name(lang, args)
|
||||
with open(single_md_path, 'w') as single_md:
|
||||
with open(single_md_path, "w") as single_md:
|
||||
concatenate(lang, args.docs_dir, single_md, nav)
|
||||
|
||||
with util.temp_dir() as site_temp:
|
||||
@ -132,72 +152,83 @@ def build_single_page_version(lang, args, nav, cfg):
|
||||
shutil.copytree(docs_src_lang, docs_temp_lang)
|
||||
for root, _, filenames in os.walk(docs_temp_lang):
|
||||
for filename in filenames:
|
||||
if filename != 'single.md' and filename.endswith('.md'):
|
||||
if filename != "single.md" and filename.endswith(".md"):
|
||||
os.unlink(os.path.join(root, filename))
|
||||
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': site_temp,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": site_temp,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
|
||||
if not args.test_only:
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
|
||||
single_page_output_path = os.path.join(
|
||||
args.docs_dir, args.docs_output_dir, lang, "single"
|
||||
)
|
||||
|
||||
if os.path.exists(single_page_output_path):
|
||||
shutil.rmtree(single_page_output_path)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(site_temp, 'single'),
|
||||
single_page_output_path
|
||||
os.path.join(site_temp, "single"), single_page_output_path
|
||||
)
|
||||
|
||||
single_page_index_html = os.path.join(single_page_output_path, 'index.html')
|
||||
single_page_content_js = os.path.join(single_page_output_path, 'content.js')
|
||||
single_page_index_html = os.path.join(
|
||||
single_page_output_path, "index.html"
|
||||
)
|
||||
single_page_content_js = os.path.join(
|
||||
single_page_output_path, "content.js"
|
||||
)
|
||||
|
||||
with open(single_page_index_html, 'r') as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split('<!-- BREAK -->')
|
||||
with open(single_page_index_html, "r") as f:
|
||||
sp_prefix, sp_js, sp_suffix = f.read().split("<!-- BREAK -->")
|
||||
|
||||
with open(single_page_index_html, 'w') as f:
|
||||
with open(single_page_index_html, "w") as f:
|
||||
f.write(sp_prefix)
|
||||
f.write(sp_suffix)
|
||||
|
||||
with open(single_page_content_js, 'w') as f:
|
||||
with open(single_page_content_js, "w") as f:
|
||||
if args.minify:
|
||||
import jsmin
|
||||
|
||||
sp_js = jsmin.jsmin(sp_js)
|
||||
f.write(sp_js)
|
||||
|
||||
logging.info(f'Re-building single page for {lang} pdf/test')
|
||||
logging.info(f"Re-building single page for {lang} pdf/test")
|
||||
with util.temp_dir() as test_dir:
|
||||
extra['single_page'] = False
|
||||
cfg.load_dict({
|
||||
'docs_dir': docs_temp_lang,
|
||||
'site_dir': test_dir,
|
||||
'extra': extra,
|
||||
'nav': [
|
||||
{cfg.data.get('site_name'): 'single.md'}
|
||||
]
|
||||
})
|
||||
extra["single_page"] = False
|
||||
cfg.load_dict(
|
||||
{
|
||||
"docs_dir": docs_temp_lang,
|
||||
"site_dir": test_dir,
|
||||
"extra": extra,
|
||||
"nav": [{cfg.data.get("site_name"): "single.md"}],
|
||||
}
|
||||
)
|
||||
mkdocs.commands.build.build(cfg)
|
||||
|
||||
css_in = ' '.join(website.get_css_in(args))
|
||||
js_in = ' '.join(website.get_js_in(args))
|
||||
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
|
||||
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
|
||||
css_in = " ".join(website.get_css_in(args))
|
||||
js_in = " ".join(website.get_js_in(args))
|
||||
subprocess.check_call(
|
||||
f"cat {css_in} > {test_dir}/css/base.css", shell=True
|
||||
)
|
||||
subprocess.check_call(
|
||||
f"cat {js_in} > {test_dir}/js/base.js", shell=True
|
||||
)
|
||||
|
||||
if args.save_raw_single_page:
|
||||
shutil.copytree(test_dir, args.save_raw_single_page)
|
||||
|
||||
logging.info(f'Running tests for {lang}')
|
||||
logging.info(f"Running tests for {lang}")
|
||||
test.test_single_page(
|
||||
os.path.join(test_dir, 'single', 'index.html'), lang)
|
||||
os.path.join(test_dir, "single", "index.html"), lang
|
||||
)
|
||||
|
||||
logging.info(f'Finished building single page version for {lang}')
|
||||
logging.info(f"Finished building single page version for {lang}")
|
||||
|
||||
remove_temporary_files(lang, args)
|
||||
|
@ -8,14 +8,11 @@ import subprocess
|
||||
|
||||
|
||||
def test_single_page(input_path, lang):
|
||||
if not (lang == 'en'):
|
||||
if not (lang == "en"):
|
||||
return
|
||||
|
||||
with open(input_path) as f:
|
||||
soup = bs4.BeautifulSoup(
|
||||
f,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(f, features="html.parser")
|
||||
|
||||
anchor_points = set()
|
||||
|
||||
@ -23,30 +20,27 @@ def test_single_page(input_path, lang):
|
||||
links_to_nowhere = 0
|
||||
|
||||
for tag in soup.find_all():
|
||||
for anchor_point in [tag.attrs.get('name'), tag.attrs.get('id')]:
|
||||
for anchor_point in [tag.attrs.get("name"), tag.attrs.get("id")]:
|
||||
if anchor_point:
|
||||
anchor_points.add(anchor_point)
|
||||
|
||||
for tag in soup.find_all():
|
||||
href = tag.attrs.get('href')
|
||||
if href and href.startswith('#') and href != '#':
|
||||
href = tag.attrs.get("href")
|
||||
if href and href.startswith("#") and href != "#":
|
||||
if href[1:] not in anchor_points:
|
||||
links_to_nowhere += 1
|
||||
logging.info("Tag %s", tag)
|
||||
logging.info('Link to nowhere: %s' % href)
|
||||
logging.info("Link to nowhere: %s" % href)
|
||||
|
||||
if links_to_nowhere:
|
||||
logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}')
|
||||
logging.error(f"Found {links_to_nowhere} links to nowhere in {lang}")
|
||||
sys.exit(1)
|
||||
|
||||
if len(anchor_points) <= 10:
|
||||
logging.error('Html parsing is probably broken')
|
||||
logging.error("Html parsing is probably broken")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
stream=sys.stderr
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)
|
||||
test_single_page(sys.argv[1], sys.argv[2])
|
||||
|
@ -15,7 +15,7 @@ import yaml
|
||||
|
||||
@contextlib.contextmanager
|
||||
def temp_dir():
|
||||
path = tempfile.mkdtemp(dir=os.environ.get('TEMP'))
|
||||
path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
|
||||
try:
|
||||
yield path
|
||||
finally:
|
||||
@ -34,7 +34,7 @@ def cd(new_cwd):
|
||||
|
||||
def get_free_port():
|
||||
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
|
||||
s.bind(('', 0))
|
||||
s.bind(("", 0))
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
return s.getsockname()[1]
|
||||
|
||||
@ -61,12 +61,12 @@ def read_md_file(path):
|
||||
meta_text = []
|
||||
content = []
|
||||
if os.path.exists(path):
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
for line in f:
|
||||
if line.startswith('---'):
|
||||
if line.startswith("---"):
|
||||
if in_meta:
|
||||
in_meta = False
|
||||
meta = yaml.full_load(''.join(meta_text))
|
||||
meta = yaml.full_load("".join(meta_text))
|
||||
else:
|
||||
in_meta = True
|
||||
else:
|
||||
@ -74,7 +74,7 @@ def read_md_file(path):
|
||||
meta_text.append(line)
|
||||
else:
|
||||
content.append(line)
|
||||
return meta, ''.join(content)
|
||||
return meta, "".join(content)
|
||||
|
||||
|
||||
def write_md_file(path, meta, content):
|
||||
@ -82,13 +82,13 @@ def write_md_file(path, meta, content):
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
with open(path, 'w') as f:
|
||||
with open(path, "w") as f:
|
||||
if meta:
|
||||
print('---', file=f)
|
||||
print("---", file=f)
|
||||
yaml.dump(meta, f)
|
||||
print('---', file=f)
|
||||
if not content.startswith('\n'):
|
||||
print('', file=f)
|
||||
print("---", file=f)
|
||||
if not content.startswith("\n"):
|
||||
print("", file=f)
|
||||
f.write(content)
|
||||
|
||||
|
||||
@ -100,7 +100,7 @@ def represent_ordereddict(dumper, data):
|
||||
|
||||
value.append((node_key, node_value))
|
||||
|
||||
return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
|
||||
return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
|
||||
|
||||
|
||||
yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
@ -109,30 +109,31 @@ yaml.add_representer(collections.OrderedDict, represent_ordereddict)
|
||||
def init_jinja2_filters(env):
|
||||
import amp
|
||||
import website
|
||||
|
||||
chunk_size = 10240
|
||||
env.filters['chunks'] = lambda line: [line[i:i + chunk_size] for i in range(0, len(line), chunk_size)]
|
||||
env.filters['html_to_amp'] = amp.html_to_amp
|
||||
env.filters['adjust_markdown_html'] = website.adjust_markdown_html
|
||||
env.filters['to_rfc882'] = lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').strftime('%a, %d %b %Y %H:%M:%S GMT')
|
||||
env.filters["chunks"] = lambda line: [
|
||||
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
|
||||
]
|
||||
env.filters["html_to_amp"] = amp.html_to_amp
|
||||
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
|
||||
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
|
||||
d, "%Y-%m-%d"
|
||||
).strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
|
||||
|
||||
def init_jinja2_env(args):
|
||||
import mdx_clickhouse
|
||||
|
||||
env = jinja2.Environment(
|
||||
loader=jinja2.FileSystemLoader([
|
||||
args.website_dir,
|
||||
os.path.join(args.docs_dir, '_includes')
|
||||
]),
|
||||
extensions=[
|
||||
'jinja2.ext.i18n',
|
||||
'jinja2_highlight.HighlightExtension'
|
||||
]
|
||||
loader=jinja2.FileSystemLoader(
|
||||
[args.website_dir, os.path.join(args.docs_dir, "_includes")]
|
||||
),
|
||||
extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
|
||||
)
|
||||
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
|
||||
translations_dir = os.path.join(args.website_dir, 'locale')
|
||||
env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
|
||||
translations_dir = os.path.join(args.website_dir, "locale")
|
||||
env.install_gettext_translations(
|
||||
mdx_clickhouse.get_translations(translations_dir, 'en'),
|
||||
newstyle=True
|
||||
mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
|
||||
)
|
||||
init_jinja2_filters(env)
|
||||
return env
|
||||
|
@ -17,108 +17,112 @@ import util
|
||||
|
||||
|
||||
def handle_iframe(iframe, soup):
|
||||
allowed_domains = ['https://www.youtube.com/', 'https://datalens.yandex/']
|
||||
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
|
||||
illegal_domain = True
|
||||
iframe_src = iframe.attrs['src']
|
||||
iframe_src = iframe.attrs["src"]
|
||||
for domain in allowed_domains:
|
||||
if iframe_src.startswith(domain):
|
||||
illegal_domain = False
|
||||
break
|
||||
if illegal_domain:
|
||||
raise RuntimeError(f'iframe from illegal domain: {iframe_src}')
|
||||
wrapper = soup.new_tag('div')
|
||||
wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9']
|
||||
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
|
||||
wrapper = soup.new_tag("div")
|
||||
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
|
||||
iframe.insert_before(wrapper)
|
||||
iframe.extract()
|
||||
wrapper.insert(0, iframe)
|
||||
if 'width' in iframe.attrs:
|
||||
del iframe.attrs['width']
|
||||
if 'height' in iframe.attrs:
|
||||
del iframe.attrs['height']
|
||||
iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture'
|
||||
iframe.attrs['class'] = 'embed-responsive-item'
|
||||
iframe.attrs['frameborder'] = '0'
|
||||
iframe.attrs['allowfullscreen'] = '1'
|
||||
if "width" in iframe.attrs:
|
||||
del iframe.attrs["width"]
|
||||
if "height" in iframe.attrs:
|
||||
del iframe.attrs["height"]
|
||||
iframe.attrs[
|
||||
"allow"
|
||||
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
|
||||
iframe.attrs["class"] = "embed-responsive-item"
|
||||
iframe.attrs["frameborder"] = "0"
|
||||
iframe.attrs["allowfullscreen"] = "1"
|
||||
|
||||
|
||||
def adjust_markdown_html(content):
|
||||
soup = bs4.BeautifulSoup(
|
||||
content,
|
||||
features='html.parser'
|
||||
)
|
||||
soup = bs4.BeautifulSoup(content, features="html.parser")
|
||||
|
||||
for a in soup.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
a_href = a.attrs.get('href')
|
||||
if a_class and 'headerlink' in a_class:
|
||||
a.string = '\xa0'
|
||||
if a_href and a_href.startswith('http'):
|
||||
a.attrs['target'] = '_blank'
|
||||
for a in soup.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
a_href = a.attrs.get("href")
|
||||
if a_class and "headerlink" in a_class:
|
||||
a.string = "\xa0"
|
||||
if a_href and a_href.startswith("http"):
|
||||
a.attrs["target"] = "_blank"
|
||||
|
||||
for code in soup.find_all('code'):
|
||||
code_class = code.attrs.get('class')
|
||||
for code in soup.find_all("code"):
|
||||
code_class = code.attrs.get("class")
|
||||
if code_class:
|
||||
code.attrs['class'] = code_class + ['syntax']
|
||||
code.attrs["class"] = code_class + ["syntax"]
|
||||
else:
|
||||
code.attrs['class'] = 'syntax'
|
||||
code.attrs["class"] = "syntax"
|
||||
|
||||
for iframe in soup.find_all('iframe'):
|
||||
for iframe in soup.find_all("iframe"):
|
||||
handle_iframe(iframe, soup)
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
if img.attrs.get('alt') == 'iframe':
|
||||
img.name = 'iframe'
|
||||
img.string = ''
|
||||
for img in soup.find_all("img"):
|
||||
if img.attrs.get("alt") == "iframe":
|
||||
img.name = "iframe"
|
||||
img.string = ""
|
||||
handle_iframe(img, soup)
|
||||
continue
|
||||
img_class = img.attrs.get('class')
|
||||
img_class = img.attrs.get("class")
|
||||
if img_class:
|
||||
img.attrs['class'] = img_class + ['img-fluid']
|
||||
img.attrs["class"] = img_class + ["img-fluid"]
|
||||
else:
|
||||
img.attrs['class'] = 'img-fluid'
|
||||
img.attrs["class"] = "img-fluid"
|
||||
|
||||
for details in soup.find_all('details'):
|
||||
for summary in details.find_all('summary'):
|
||||
for details in soup.find_all("details"):
|
||||
for summary in details.find_all("summary"):
|
||||
if summary.parent != details:
|
||||
summary.extract()
|
||||
details.insert(0, summary)
|
||||
|
||||
for dd in soup.find_all('dd'):
|
||||
dd_class = dd.attrs.get('class')
|
||||
for dd in soup.find_all("dd"):
|
||||
dd_class = dd.attrs.get("class")
|
||||
if dd_class:
|
||||
dd.attrs['class'] = dd_class + ['pl-3']
|
||||
dd.attrs["class"] = dd_class + ["pl-3"]
|
||||
else:
|
||||
dd.attrs['class'] = 'pl-3'
|
||||
dd.attrs["class"] = "pl-3"
|
||||
|
||||
for div in soup.find_all('div'):
|
||||
div_class = div.attrs.get('class')
|
||||
is_admonition = div_class and 'admonition' in div.attrs.get('class')
|
||||
for div in soup.find_all("div"):
|
||||
div_class = div.attrs.get("class")
|
||||
is_admonition = div_class and "admonition" in div.attrs.get("class")
|
||||
if is_admonition:
|
||||
for a in div.find_all('a'):
|
||||
a_class = a.attrs.get('class')
|
||||
for a in div.find_all("a"):
|
||||
a_class = a.attrs.get("class")
|
||||
if a_class:
|
||||
a.attrs['class'] = a_class + ['alert-link']
|
||||
a.attrs["class"] = a_class + ["alert-link"]
|
||||
else:
|
||||
a.attrs['class'] = 'alert-link'
|
||||
a.attrs["class"] = "alert-link"
|
||||
|
||||
for p in div.find_all('p'):
|
||||
p_class = p.attrs.get('class')
|
||||
if is_admonition and p_class and ('admonition-title' in p_class):
|
||||
p.attrs['class'] = p_class + ['alert-heading', 'display-4', 'text-reset', 'mb-2']
|
||||
for p in div.find_all("p"):
|
||||
p_class = p.attrs.get("class")
|
||||
if is_admonition and p_class and ("admonition-title" in p_class):
|
||||
p.attrs["class"] = p_class + [
|
||||
"alert-heading",
|
||||
"display-4",
|
||||
"text-reset",
|
||||
"mb-2",
|
||||
]
|
||||
|
||||
if is_admonition:
|
||||
div.attrs['role'] = 'alert'
|
||||
if ('info' in div_class) or ('note' in div_class):
|
||||
mode = 'alert-primary'
|
||||
elif ('attention' in div_class) or ('warning' in div_class):
|
||||
mode = 'alert-warning'
|
||||
elif 'important' in div_class:
|
||||
mode = 'alert-danger'
|
||||
elif 'tip' in div_class:
|
||||
mode = 'alert-info'
|
||||
div.attrs["role"] = "alert"
|
||||
if ("info" in div_class) or ("note" in div_class):
|
||||
mode = "alert-primary"
|
||||
elif ("attention" in div_class) or ("warning" in div_class):
|
||||
mode = "alert-warning"
|
||||
elif "important" in div_class:
|
||||
mode = "alert-danger"
|
||||
elif "tip" in div_class:
|
||||
mode = "alert-info"
|
||||
else:
|
||||
mode = 'alert-secondary'
|
||||
div.attrs['class'] = div_class + ['alert', 'pb-0', 'mb-4', mode]
|
||||
mode = "alert-secondary"
|
||||
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
|
||||
|
||||
return str(soup)
|
||||
|
||||
@ -128,61 +132,63 @@ def minify_html(content):
|
||||
|
||||
|
||||
def build_website(args):
|
||||
logging.info('Building website')
|
||||
logging.info("Building website")
|
||||
env = util.init_jinja2_env(args)
|
||||
|
||||
shutil.copytree(
|
||||
args.website_dir,
|
||||
args.output_dir,
|
||||
ignore=shutil.ignore_patterns(
|
||||
'*.md',
|
||||
'*.sh',
|
||||
'*.css',
|
||||
'*.json',
|
||||
'js/*.js',
|
||||
'build',
|
||||
'docs',
|
||||
'public',
|
||||
'node_modules',
|
||||
'src',
|
||||
'templates',
|
||||
'locale',
|
||||
'.gitkeep'
|
||||
)
|
||||
"*.md",
|
||||
"*.sh",
|
||||
"*.css",
|
||||
"*.json",
|
||||
"js/*.js",
|
||||
"build",
|
||||
"docs",
|
||||
"public",
|
||||
"node_modules",
|
||||
"src",
|
||||
"templates",
|
||||
"locale",
|
||||
".gitkeep",
|
||||
),
|
||||
)
|
||||
|
||||
shutil.copytree(
|
||||
os.path.join(args.website_dir, 'images'),
|
||||
os.path.join(args.output_dir, 'docs', 'images')
|
||||
os.path.join(args.website_dir, "images"),
|
||||
os.path.join(args.output_dir, "docs", "images"),
|
||||
)
|
||||
|
||||
# This file can be requested to check for available ClickHouse releases.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'),
|
||||
os.path.join(args.output_dir, 'data', 'version_date.tsv'))
|
||||
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
|
||||
os.path.join(args.output_dir, "data", "version_date.tsv"),
|
||||
)
|
||||
|
||||
# This file can be requested to install ClickHouse.
|
||||
shutil.copy2(
|
||||
os.path.join(args.src_dir, 'docs', '_includes', 'install', 'universal.sh'),
|
||||
os.path.join(args.output_dir, 'data', 'install.sh'))
|
||||
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
|
||||
os.path.join(args.output_dir, "data", "install.sh"),
|
||||
)
|
||||
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
if filename == 'main.html':
|
||||
if filename == "main.html":
|
||||
continue
|
||||
|
||||
path = os.path.join(root, filename)
|
||||
if not filename.endswith('.html'):
|
||||
if not filename.endswith(".html"):
|
||||
continue
|
||||
logging.info('Processing %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
logging.info("Processing %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
|
||||
template = env.from_string(content)
|
||||
content = template.render(args.__dict__)
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def get_css_in(args):
|
||||
@ -193,7 +199,7 @@ def get_css_in(args):
|
||||
f"'{args.website_dir}/css/blog.css'",
|
||||
f"'{args.website_dir}/css/docs.css'",
|
||||
f"'{args.website_dir}/css/highlight.css'",
|
||||
f"'{args.website_dir}/css/main.css'"
|
||||
f"'{args.website_dir}/css/main.css'",
|
||||
]
|
||||
|
||||
|
||||
@ -207,42 +213,41 @@ def get_js_in(args):
|
||||
f"'{args.website_dir}/js/index.js'",
|
||||
f"'{args.website_dir}/js/docsearch.js'",
|
||||
f"'{args.website_dir}/js/docs.js'",
|
||||
f"'{args.website_dir}/js/main.js'"
|
||||
f"'{args.website_dir}/js/main.js'",
|
||||
]
|
||||
|
||||
|
||||
def minify_file(path, css_digest, js_digest):
|
||||
if not (
|
||||
path.endswith('.html') or
|
||||
path.endswith('.css')
|
||||
):
|
||||
if not (path.endswith(".html") or path.endswith(".css")):
|
||||
return
|
||||
|
||||
logging.info('Minifying %s', path)
|
||||
with open(path, 'rb') as f:
|
||||
content = f.read().decode('utf-8')
|
||||
if path.endswith('.html'):
|
||||
logging.info("Minifying %s", path)
|
||||
with open(path, "rb") as f:
|
||||
content = f.read().decode("utf-8")
|
||||
if path.endswith(".html"):
|
||||
content = minify_html(content)
|
||||
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
|
||||
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
|
||||
# TODO: restore cssmin
|
||||
# elif path.endswith('.css'):
|
||||
# content = cssmin.cssmin(content)
|
||||
# TODO: restore jsmin
|
||||
# elif path.endswith('.js'):
|
||||
# content = jsmin.jsmin(content)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(content.encode('utf-8'))
|
||||
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
|
||||
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
|
||||
# TODO: restore cssmin
|
||||
# elif path.endswith('.css'):
|
||||
# content = cssmin.cssmin(content)
|
||||
# TODO: restore jsmin
|
||||
# elif path.endswith('.js'):
|
||||
# content = jsmin.jsmin(content)
|
||||
with open(path, "wb") as f:
|
||||
f.write(content.encode("utf-8"))
|
||||
|
||||
|
||||
def minify_website(args):
|
||||
css_in = ' '.join(get_css_in(args))
|
||||
css_out = f'{args.output_dir}/docs/css/base.css'
|
||||
os.makedirs(f'{args.output_dir}/docs/css')
|
||||
css_in = " ".join(get_css_in(args))
|
||||
css_out = f"{args.output_dir}/docs/css/base.css"
|
||||
os.makedirs(f"{args.output_dir}/docs/css")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
|
||||
command = (
|
||||
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
|
||||
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
|
||||
)
|
||||
logging.info(css_in)
|
||||
logging.info(command)
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
@ -251,51 +256,60 @@ def minify_website(args):
|
||||
else:
|
||||
command = f"cat {css_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(css_out, 'wb+') as f:
|
||||
with open(css_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(css_out, 'rb') as f:
|
||||
with open(css_out, "rb") as f:
|
||||
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
|
||||
js_in = ' '.join(get_js_in(args))
|
||||
js_out = f'{args.output_dir}/docs/js/base.js'
|
||||
os.makedirs(f'{args.output_dir}/docs/js')
|
||||
js_in = " ".join(get_js_in(args))
|
||||
js_out = f"{args.output_dir}/docs/js/base.js"
|
||||
os.makedirs(f"{args.output_dir}/docs/js")
|
||||
|
||||
if args.minify and False: # TODO: return closure
|
||||
js_in = [js[1:-1] for js in js_in]
|
||||
closure_args = [
|
||||
'--js', *js_in, '--js_output_file', js_out,
|
||||
'--compilation_level', 'SIMPLE',
|
||||
'--dependency_mode', 'NONE',
|
||||
'--third_party', '--use_types_for_optimization',
|
||||
'--isolation_mode', 'IIFE'
|
||||
"--js",
|
||||
*js_in,
|
||||
"--js_output_file",
|
||||
js_out,
|
||||
"--compilation_level",
|
||||
"SIMPLE",
|
||||
"--dependency_mode",
|
||||
"NONE",
|
||||
"--third_party",
|
||||
"--use_types_for_optimization",
|
||||
"--isolation_mode",
|
||||
"IIFE",
|
||||
]
|
||||
logging.info(closure_args)
|
||||
if closure.run(*closure_args):
|
||||
raise RuntimeError('failed to run closure compiler')
|
||||
with open(js_out, 'r') as f:
|
||||
raise RuntimeError("failed to run closure compiler")
|
||||
with open(js_out, "r") as f:
|
||||
js_content = jsmin.jsmin(f.read())
|
||||
with open(js_out, 'w') as f:
|
||||
with open(js_out, "w") as f:
|
||||
f.write(js_content)
|
||||
|
||||
else:
|
||||
command = f"cat {js_in}"
|
||||
output = subprocess.check_output(command, shell=True)
|
||||
with open(js_out, 'wb+') as f:
|
||||
with open(js_out, "wb+") as f:
|
||||
f.write(output)
|
||||
|
||||
with open(js_out, 'rb') as f:
|
||||
with open(js_out, "rb") as f:
|
||||
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
||||
logging.info(js_digest)
|
||||
|
||||
if args.minify:
|
||||
logging.info('Minifying website')
|
||||
logging.info("Minifying website")
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
for root, _, filenames in os.walk(args.output_dir):
|
||||
for filename in filenames:
|
||||
path = os.path.join(root, filename)
|
||||
futures.append(executor.submit(minify_file, path, css_digest, js_digest))
|
||||
futures.append(
|
||||
executor.submit(minify_file, path, css_digest, js_digest)
|
||||
)
|
||||
for future in futures:
|
||||
exc = future.exception()
|
||||
if exc:
|
||||
@ -304,24 +318,28 @@ def minify_website(args):
|
||||
|
||||
|
||||
def process_benchmark_results(args):
|
||||
benchmark_root = os.path.join(args.website_dir, 'benchmark')
|
||||
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
||||
required_keys = {
|
||||
'dbms': ['result'],
|
||||
'hardware': ['result', 'system', 'system_full', 'kind']
|
||||
"dbms": ["result"],
|
||||
"hardware": ["result", "system", "system_full", "kind"],
|
||||
}
|
||||
for benchmark_kind in ['dbms', 'hardware']:
|
||||
for benchmark_kind in ["dbms", "hardware"]:
|
||||
results = []
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
|
||||
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
|
||||
for result in sorted(os.listdir(results_root)):
|
||||
result_file = os.path.join(results_root, result)
|
||||
logging.debug(f'Reading benchmark result from {result_file}')
|
||||
with open(result_file, 'r') as f:
|
||||
logging.debug(f"Reading benchmark result from {result_file}")
|
||||
with open(result_file, "r") as f:
|
||||
result = json.loads(f.read())
|
||||
for item in result:
|
||||
for required_key in required_keys[benchmark_kind]:
|
||||
assert required_key in item, f'No "{required_key}" in {result_file}'
|
||||
assert (
|
||||
required_key in item
|
||||
), f'No "{required_key}" in {result_file}'
|
||||
results += result
|
||||
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
|
||||
with open(results_js, 'w') as f:
|
||||
results_js = os.path.join(
|
||||
args.output_dir, "benchmark", benchmark_kind, "results.js"
|
||||
)
|
||||
with open(results_js, "w") as f:
|
||||
data = json.dumps(results)
|
||||
f.write(f'var results = {data};')
|
||||
f.write(f"var results = {data};")
|
||||
|
@ -42,6 +42,8 @@ git push
|
||||
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
|
||||
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
|
||||
|
||||
使用 [black](https://github.com/psf/black/) 檢查 python 代碼.
|
||||
|
||||
### 报告详情 {#report-details}
|
||||
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
|
||||
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
|
||||
|
@ -7,16 +7,14 @@ import string
|
||||
TOKEN_TEXT = 1
|
||||
TOKEN_VAR = 2
|
||||
|
||||
TOKEN_COLON = ':'
|
||||
TOKEN_SEMI = ';'
|
||||
TOKEN_OR = '|'
|
||||
TOKEN_QUESTIONMARK = '?'
|
||||
TOKEN_ROUND_BRACKET_OPEN = '('
|
||||
TOKEN_ROUND_BRACKET_CLOSE = ')'
|
||||
TOKEN_ASTERISK = '*'
|
||||
TOKEN_SLASH = '/'
|
||||
|
||||
|
||||
TOKEN_COLON = ":"
|
||||
TOKEN_SEMI = ";"
|
||||
TOKEN_OR = "|"
|
||||
TOKEN_QUESTIONMARK = "?"
|
||||
TOKEN_ROUND_BRACKET_OPEN = "("
|
||||
TOKEN_ROUND_BRACKET_CLOSE = ")"
|
||||
TOKEN_ASTERISK = "*"
|
||||
TOKEN_SLASH = "/"
|
||||
|
||||
|
||||
class TextValue:
|
||||
@ -27,9 +25,9 @@ class TextValue:
|
||||
def get_slug(self):
|
||||
if self.slug is not None:
|
||||
return self.slug
|
||||
slug = ''
|
||||
slug = ""
|
||||
for c in self.t:
|
||||
slug += c if c in string.ascii_letters else '_'
|
||||
slug += c if c in string.ascii_letters else "_"
|
||||
self.slug = slug
|
||||
return slug
|
||||
|
||||
@ -37,12 +35,12 @@ class TextValue:
|
||||
return f"TextValue_{self.get_slug()}"
|
||||
|
||||
def __repr__(self):
|
||||
return f"TextValue(\"{self.t}\")"
|
||||
return f'TextValue("{self.t}")'
|
||||
|
||||
|
||||
class Var:
|
||||
def __init__(self, id_):
|
||||
self.id_ = id_
|
||||
self.id_ = id_
|
||||
|
||||
def __repr__(self):
|
||||
return f"Var({self.id_})"
|
||||
@ -59,8 +57,8 @@ class Parser:
|
||||
self.cur_tok = None
|
||||
self.includes = []
|
||||
|
||||
self.proto = ''
|
||||
self.cpp = ''
|
||||
self.proto = ""
|
||||
self.cpp = ""
|
||||
|
||||
def parse_file(self, filename):
|
||||
with open(filename) as f:
|
||||
@ -81,7 +79,7 @@ class Parser:
|
||||
if self.text[0] == '"':
|
||||
return self.parse_txt_value()
|
||||
|
||||
if self.text[0] == '$':
|
||||
if self.text[0] == "$":
|
||||
return self.parse_var_value()
|
||||
|
||||
c, self.text = self.text[0], self.text[1:]
|
||||
@ -89,9 +87,9 @@ class Parser:
|
||||
return c
|
||||
|
||||
def parse_var_value(self):
|
||||
i = self.text.find(' ')
|
||||
i = self.text.find(" ")
|
||||
|
||||
id_, self.text = self.text[1:i], self.text[i+1:]
|
||||
id_, self.text = self.text[1:i], self.text[i + 1 :]
|
||||
self.var_id = int(id_)
|
||||
self.cur_tok = TOKEN_VAR
|
||||
return TOKEN_VAR
|
||||
@ -100,12 +98,12 @@ class Parser:
|
||||
if self.text[0] != '"':
|
||||
raise Exception("parse_txt_value: expected quote at the start")
|
||||
|
||||
self.t = ''
|
||||
self.t = ""
|
||||
self.text = self.text[1:]
|
||||
|
||||
while self.text[0] != '"':
|
||||
if self.text[0] == '\\':
|
||||
if self.text[1] == 'x':
|
||||
if self.text[0] == "\\":
|
||||
if self.text[1] == "x":
|
||||
self.t += self.text[:4]
|
||||
self.text = self.text[4:]
|
||||
elif self.text[1] in 'nt\\"':
|
||||
@ -123,7 +121,7 @@ class Parser:
|
||||
|
||||
def skip_ws(self):
|
||||
while self.text and self.text[0] in string.whitespace:
|
||||
if self.text[0] == '\n':
|
||||
if self.text[0] == "\n":
|
||||
self.line += 1
|
||||
self.col = 0
|
||||
self.text = self.text[1:]
|
||||
@ -134,10 +132,9 @@ class Parser:
|
||||
|
||||
def skip_line(self):
|
||||
self.line += 1
|
||||
index = self.text.find('\n')
|
||||
index = self.text.find("\n")
|
||||
self.text = self.text[index:]
|
||||
|
||||
|
||||
def parse_statement(self):
|
||||
if self.skip_ws() is None:
|
||||
return None
|
||||
@ -164,52 +161,54 @@ class Parser:
|
||||
|
||||
def generate(self):
|
||||
self.proto = 'syntax = "proto3";\n\n'
|
||||
self.cpp = '#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n'
|
||||
self.cpp = "#include <iostream>\n#include <string>\n#include <vector>\n\n#include <libfuzzer/libfuzzer_macro.h>\n\n"
|
||||
|
||||
for incl_file in self.includes:
|
||||
self.cpp += f'#include "{incl_file}"\n'
|
||||
self.cpp += '\n'
|
||||
self.cpp += "\n"
|
||||
|
||||
self.proto += 'message Word {\n'
|
||||
self.proto += '\tenum Value {\n'
|
||||
self.proto += "message Word {\n"
|
||||
self.proto += "\tenum Value {\n"
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n'
|
||||
self.cpp += "void GenerateWord(const Word&, std::string&, int);\n\n"
|
||||
|
||||
self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n'
|
||||
self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n'
|
||||
self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n'
|
||||
self.cpp += '\t}\n'
|
||||
self.cpp += '}\n'
|
||||
self.cpp += (
|
||||
"void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n"
|
||||
)
|
||||
self.cpp += "\tfor (int i = 0; i < stc.words_size(); i++ ) {\n"
|
||||
self.cpp += "\t\tGenerateWord(stc.words(i), s, ++depth);\n"
|
||||
self.cpp += "\t}\n"
|
||||
self.cpp += "}\n"
|
||||
|
||||
self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n'
|
||||
self.cpp += "void GenerateWord(const Word& word, std::string &s, int depth) {\n"
|
||||
|
||||
self.cpp += '\tif (depth > 5) return;\n\n'
|
||||
self.cpp += '\tswitch (word.value()) {\n'
|
||||
self.cpp += "\tif (depth > 5) return;\n\n"
|
||||
self.cpp += "\tswitch (word.value()) {\n"
|
||||
|
||||
for idx, chain in enumerate(self.chains):
|
||||
self.proto += f'\t\tvalue_{idx} = {idx};\n'
|
||||
self.proto += f"\t\tvalue_{idx} = {idx};\n"
|
||||
|
||||
self.cpp += f'\t\tcase {idx}: {{\n'
|
||||
self.cpp += f"\t\tcase {idx}: {{\n"
|
||||
num_var = 0
|
||||
for item in chain:
|
||||
if isinstance(item, TextValue):
|
||||
self.cpp += f'\t\t\ts += "{item.t}";\n'
|
||||
elif isinstance(item, Var):
|
||||
self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n'
|
||||
self.cpp += f"\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n"
|
||||
num_var += 1
|
||||
else:
|
||||
raise Exception("unknown token met during generation")
|
||||
self.cpp += '\t\t\tbreak;\n\t\t}\n'
|
||||
self.cpp += '\t\tdefault: break;\n'
|
||||
self.cpp += "\t\t\tbreak;\n\t\t}\n"
|
||||
self.cpp += "\t\tdefault: break;\n"
|
||||
|
||||
self.cpp += '\t}\n'
|
||||
self.cpp += "\t}\n"
|
||||
|
||||
self.proto += '\t}\n'
|
||||
self.proto += '\tValue value = 1;\n'
|
||||
self.proto += '\tSentence inner = 2;\n'
|
||||
self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}'
|
||||
self.proto += "\t}\n"
|
||||
self.proto += "\tValue value = 1;\n"
|
||||
self.proto += "\tSentence inner = 2;\n"
|
||||
self.proto += "}\nmessage Sentence {\n\trepeated Word words = 1;\n}"
|
||||
|
||||
self.cpp += '}\n'
|
||||
self.cpp += "}\n"
|
||||
return self.cpp, self.proto
|
||||
|
||||
def fatal_parsing_error(self, msg):
|
||||
@ -220,7 +219,7 @@ class Parser:
|
||||
def main(args):
|
||||
input_file, outfile_cpp, outfile_proto = args
|
||||
|
||||
if not outfile_proto.endswith('.proto'):
|
||||
if not outfile_proto.endswith(".proto"):
|
||||
raise Exception("outfile_proto (argv[3]) should end with `.proto`")
|
||||
|
||||
include_filename = outfile_proto[:-6] + ".pb.h"
|
||||
@ -231,17 +230,17 @@ def main(args):
|
||||
|
||||
cpp, proto = p.generate()
|
||||
|
||||
proto = proto.replace('\t', ' ' * 4)
|
||||
cpp = cpp.replace('\t', ' ' * 4)
|
||||
proto = proto.replace("\t", " " * 4)
|
||||
cpp = cpp.replace("\t", " " * 4)
|
||||
|
||||
with open(outfile_cpp, 'w') as f:
|
||||
with open(outfile_cpp, "w") as f:
|
||||
f.write(cpp)
|
||||
|
||||
with open(outfile_proto, 'w') as f:
|
||||
with open(outfile_proto, "w") as f:
|
||||
f.write(proto)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
print(f"Usage {sys.argv[0]} <input_file> <outfile.cpp> <outfile.proto>")
|
||||
sys.exit(1)
|
||||
|
@ -9,7 +9,9 @@ import re
|
||||
|
||||
parts = {}
|
||||
for s in sys.stdin.read().split():
|
||||
m = re.match('^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$', s)
|
||||
m = re.match(
|
||||
"^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s
|
||||
)
|
||||
if m == None:
|
||||
continue
|
||||
m1 = m.group(1)
|
||||
@ -18,7 +20,7 @@ for s in sys.stdin.read().split():
|
||||
i2 = int(m.group(4))
|
||||
l = int(m.group(5))
|
||||
if m1 != m2:
|
||||
raise Exception('not in single month: ' + s)
|
||||
raise Exception("not in single month: " + s)
|
||||
if m1 not in parts:
|
||||
parts[m1] = []
|
||||
parts[m1].append((i1, i2, l, s))
|
||||
@ -27,13 +29,13 @@ for m, ps in sorted(parts.items()):
|
||||
ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2]))
|
||||
(x2, y2, l2, s2) = (-1, -1, -1, -1)
|
||||
for x1, y1, l1, s1 in ps:
|
||||
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
|
||||
if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1
|
||||
pass
|
||||
elif x1 > y2: # 1 is to the right of 2
|
||||
elif x1 > y2: # 1 is to the right of 2
|
||||
if x1 != y2 + 1 and y2 != -1:
|
||||
print() # to see the missing numbers
|
||||
print() # to see the missing numbers
|
||||
(x2, y2, l2, s2) = (x1, y1, l1, s1)
|
||||
print(s1)
|
||||
else:
|
||||
raise Exception('invalid parts intersection: ' + s1 + ' and ' + s2)
|
||||
raise Exception("invalid parts intersection: " + s1 + " and " + s2)
|
||||
print()
|
||||
|
@ -7,8 +7,14 @@ import sys
|
||||
|
||||
from github import Github
|
||||
|
||||
from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \
|
||||
GITHUB_RUN_ID
|
||||
from env_helper import (
|
||||
GITHUB_REPOSITORY,
|
||||
TEMP_PATH,
|
||||
REPO_COPY,
|
||||
REPORTS_PATH,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
)
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
from pr_info import PRInfo
|
||||
@ -19,19 +25,24 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickh
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
IMAGE_NAME = 'clickhouse/fuzzer'
|
||||
IMAGE_NAME = "clickhouse/fuzzer"
|
||||
|
||||
|
||||
def get_run_command(pr_number, sha, download_url, workspace_path, image):
|
||||
return f'docker run --network=host --volume={workspace_path}:/workspace ' \
|
||||
'--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \
|
||||
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\
|
||||
f'{image}'
|
||||
return (
|
||||
f"docker run --network=host --volume={workspace_path}:/workspace "
|
||||
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
|
||||
f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '
|
||||
f"{image}"
|
||||
)
|
||||
|
||||
|
||||
def get_commit(gh, commit_sha):
|
||||
repo = gh.get_repo(GITHUB_REPOSITORY)
|
||||
commit = repo.get_commit(commit_sha)
|
||||
return commit
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -64,7 +75,7 @@ if __name__ == "__main__":
|
||||
raise Exception("No build URLs found")
|
||||
|
||||
for url in urls:
|
||||
if url.endswith('/clickhouse'):
|
||||
if url.endswith("/clickhouse"):
|
||||
build_url = url
|
||||
break
|
||||
else:
|
||||
@ -72,16 +83,20 @@ if __name__ == "__main__":
|
||||
|
||||
logging.info("Got build url %s", build_url)
|
||||
|
||||
workspace_path = os.path.join(temp_path, 'workspace')
|
||||
workspace_path = os.path.join(temp_path, "workspace")
|
||||
if not os.path.exists(workspace_path):
|
||||
os.makedirs(workspace_path)
|
||||
|
||||
run_command = get_run_command(pr_info.number, pr_info.sha, build_url, workspace_path, docker_image)
|
||||
run_command = get_run_command(
|
||||
pr_info.number, pr_info.sha, build_url, workspace_path, docker_image
|
||||
)
|
||||
logging.info("Going to run %s", run_command)
|
||||
|
||||
run_log_path = os.path.join(temp_path, "runlog.log")
|
||||
with open(run_log_path, 'w', encoding='utf-8') as log:
|
||||
with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process:
|
||||
with open(run_log_path, "w", encoding="utf-8") as log:
|
||||
with subprocess.Popen(
|
||||
run_command, shell=True, stderr=log, stdout=log
|
||||
) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
logging.info("Run successfully")
|
||||
@ -90,56 +105,70 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
check_name_lower = check_name.lower().replace('(', '').replace(')', '').replace(' ', '')
|
||||
s3_prefix = f'{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/'
|
||||
check_name_lower = (
|
||||
check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
|
||||
)
|
||||
s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/"
|
||||
paths = {
|
||||
'runlog.log': run_log_path,
|
||||
'main.log': os.path.join(workspace_path, 'main.log'),
|
||||
'server.log': os.path.join(workspace_path, 'server.log'),
|
||||
'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'),
|
||||
'report.html': os.path.join(workspace_path, 'report.html'),
|
||||
'core.gz': os.path.join(workspace_path, 'core.gz'),
|
||||
"runlog.log": run_log_path,
|
||||
"main.log": os.path.join(workspace_path, "main.log"),
|
||||
"server.log": os.path.join(workspace_path, "server.log"),
|
||||
"fuzzer.log": os.path.join(workspace_path, "fuzzer.log"),
|
||||
"report.html": os.path.join(workspace_path, "report.html"),
|
||||
"core.gz": os.path.join(workspace_path, "core.gz"),
|
||||
}
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
for f in paths:
|
||||
try:
|
||||
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + '/' + f)
|
||||
paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + "/" + f)
|
||||
except Exception as ex:
|
||||
logging.info("Exception uploading file %s text %s", f, ex)
|
||||
paths[f] = ''
|
||||
paths[f] = ""
|
||||
|
||||
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
if paths['runlog.log']:
|
||||
report_url = paths['runlog.log']
|
||||
if paths['main.log']:
|
||||
report_url = paths['main.log']
|
||||
if paths['server.log']:
|
||||
report_url = paths['server.log']
|
||||
if paths['fuzzer.log']:
|
||||
report_url = paths['fuzzer.log']
|
||||
if paths['report.html']:
|
||||
report_url = paths['report.html']
|
||||
if paths["runlog.log"]:
|
||||
report_url = paths["runlog.log"]
|
||||
if paths["main.log"]:
|
||||
report_url = paths["main.log"]
|
||||
if paths["server.log"]:
|
||||
report_url = paths["server.log"]
|
||||
if paths["fuzzer.log"]:
|
||||
report_url = paths["fuzzer.log"]
|
||||
if paths["report.html"]:
|
||||
report_url = paths["report.html"]
|
||||
|
||||
# Try to get status message saved by the fuzzer
|
||||
try:
|
||||
with open(os.path.join(workspace_path, 'status.txt'), 'r', encoding='utf-8') as status_f:
|
||||
status = status_f.readline().rstrip('\n')
|
||||
with open(
|
||||
os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
|
||||
) as status_f:
|
||||
status = status_f.readline().rstrip("\n")
|
||||
|
||||
with open(os.path.join(workspace_path, 'description.txt'), 'r', encoding='utf-8') as desc_f:
|
||||
description = desc_f.readline().rstrip('\n')[:140]
|
||||
with open(
|
||||
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
|
||||
) as desc_f:
|
||||
description = desc_f.readline().rstrip("\n")[:140]
|
||||
except:
|
||||
status = 'failure'
|
||||
description = 'Task failed: $?=' + str(retcode)
|
||||
status = "failure"
|
||||
description = "Task failed: $?=" + str(retcode)
|
||||
|
||||
if 'fail' in status:
|
||||
test_result = [(description, 'FAIL')]
|
||||
if "fail" in status:
|
||||
test_result = [(description, "FAIL")]
|
||||
else:
|
||||
test_result = [(description, 'OK')]
|
||||
test_result = [(description, "OK")]
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_result,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name,
|
||||
)
|
||||
|
||||
logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
|
@ -6,20 +6,20 @@ import itertools
|
||||
import os
|
||||
import sys
|
||||
|
||||
NO_CHANGES_MSG = 'Nothing to run'
|
||||
NO_CHANGES_MSG = "Nothing to run"
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('report1')
|
||||
parser.add_argument('report2')
|
||||
parser.add_argument("report1")
|
||||
parser.add_argument("report2")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def post_commit_status_from_file(file_path):
|
||||
res = []
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
fin = csv.reader(f, delimiter='\t')
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
fin = csv.reader(f, delimiter="\t")
|
||||
res = list(itertools.islice(fin, 1))
|
||||
if len(res) < 1:
|
||||
raise Exception(f'Can\'t read from "{file_path}"')
|
||||
@ -31,8 +31,10 @@ def post_commit_status_from_file(file_path):
|
||||
def process_results(file_path):
|
||||
state, report_url, description = post_commit_status_from_file(file_path)
|
||||
prefix = os.path.basename(os.path.dirname(file_path))
|
||||
print(f'::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}')
|
||||
return state == 'success'
|
||||
print(
|
||||
f"::notice:: bugfix check: {prefix} - {state}: {description} Report url: {report_url}"
|
||||
)
|
||||
return state == "success"
|
||||
|
||||
|
||||
def main(args):
|
||||
@ -42,5 +44,5 @@ def main(args):
|
||||
sys.exit(0 if is_ok else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main(parse_args())
|
||||
|
@ -6,7 +6,13 @@ import os
|
||||
import sys
|
||||
from github import Github
|
||||
|
||||
from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID
|
||||
from env_helper import (
|
||||
REPORTS_PATH,
|
||||
TEMP_PATH,
|
||||
GITHUB_REPOSITORY,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
)
|
||||
from report import create_build_html_report
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
@ -15,8 +21,19 @@ from commit_status_helper import get_commit
|
||||
from ci_config import CI_CONFIG
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
class BuildResult():
|
||||
def __init__(self, compiler, build_type, sanitizer, bundled, splitted, status, elapsed_seconds, with_coverage):
|
||||
|
||||
class BuildResult:
|
||||
def __init__(
|
||||
self,
|
||||
compiler,
|
||||
build_type,
|
||||
sanitizer,
|
||||
bundled,
|
||||
splitted,
|
||||
status,
|
||||
elapsed_seconds,
|
||||
with_coverage,
|
||||
):
|
||||
self.compiler = compiler
|
||||
self.build_type = build_type
|
||||
self.sanitizer = sanitizer
|
||||
@ -26,56 +43,72 @@ class BuildResult():
|
||||
self.elapsed_seconds = elapsed_seconds
|
||||
self.with_coverage = with_coverage
|
||||
|
||||
|
||||
def group_by_artifacts(build_urls):
|
||||
groups = {'apk': [],'deb': [], 'binary': [], 'tgz': [], 'rpm': [], 'performance': []}
|
||||
groups = {
|
||||
"apk": [],
|
||||
"deb": [],
|
||||
"binary": [],
|
||||
"tgz": [],
|
||||
"rpm": [],
|
||||
"performance": [],
|
||||
}
|
||||
for url in build_urls:
|
||||
if url.endswith('performance.tgz'):
|
||||
groups['performance'].append(url)
|
||||
elif url.endswith('.deb') or url.endswith('.buildinfo') or url.endswith('.changes') or url.endswith('.tar.gz'):
|
||||
groups['deb'].append(url)
|
||||
elif url.endswith('.apk'):
|
||||
groups['apk'].append(url)
|
||||
elif url.endswith('.rpm'):
|
||||
groups['rpm'].append(url)
|
||||
elif url.endswith('.tgz'):
|
||||
groups['tgz'].append(url)
|
||||
if url.endswith("performance.tgz"):
|
||||
groups["performance"].append(url)
|
||||
elif (
|
||||
url.endswith(".deb")
|
||||
or url.endswith(".buildinfo")
|
||||
or url.endswith(".changes")
|
||||
or url.endswith(".tar.gz")
|
||||
):
|
||||
groups["deb"].append(url)
|
||||
elif url.endswith(".apk"):
|
||||
groups["apk"].append(url)
|
||||
elif url.endswith(".rpm"):
|
||||
groups["rpm"].append(url)
|
||||
elif url.endswith(".tgz"):
|
||||
groups["tgz"].append(url)
|
||||
else:
|
||||
groups['binary'].append(url)
|
||||
groups["binary"].append(url)
|
||||
return groups
|
||||
|
||||
|
||||
def process_report(build_report):
|
||||
build_config = build_report['build_config']
|
||||
build_config = build_report["build_config"]
|
||||
build_result = BuildResult(
|
||||
compiler=build_config['compiler'],
|
||||
build_type=build_config['build_type'],
|
||||
sanitizer=build_config['sanitizer'],
|
||||
bundled=build_config['bundled'],
|
||||
splitted=build_config['splitted'],
|
||||
status="success" if build_report['status'] else "failure",
|
||||
elapsed_seconds=build_report['elapsed_seconds'],
|
||||
with_coverage=False
|
||||
compiler=build_config["compiler"],
|
||||
build_type=build_config["build_type"],
|
||||
sanitizer=build_config["sanitizer"],
|
||||
bundled=build_config["bundled"],
|
||||
splitted=build_config["splitted"],
|
||||
status="success" if build_report["status"] else "failure",
|
||||
elapsed_seconds=build_report["elapsed_seconds"],
|
||||
with_coverage=False,
|
||||
)
|
||||
build_results = []
|
||||
build_urls = []
|
||||
build_logs_urls = []
|
||||
urls_groups = group_by_artifacts(build_report['build_urls'])
|
||||
urls_groups = group_by_artifacts(build_report["build_urls"])
|
||||
found_group = False
|
||||
for _, group_urls in urls_groups.items():
|
||||
if group_urls:
|
||||
build_results.append(build_result)
|
||||
build_urls.append(group_urls)
|
||||
build_logs_urls.append(build_report['log_url'])
|
||||
build_logs_urls.append(build_report["log_url"])
|
||||
found_group = True
|
||||
|
||||
if not found_group:
|
||||
build_results.append(build_result)
|
||||
build_urls.append([""])
|
||||
build_logs_urls.append(build_report['log_url'])
|
||||
build_logs_urls.append(build_report["log_url"])
|
||||
|
||||
return build_results, build_urls, build_logs_urls
|
||||
|
||||
|
||||
def get_build_name_from_file_name(file_name):
|
||||
return file_name.replace('build_urls_', '').replace('.json', '')
|
||||
return file_name.replace("build_urls_", "").replace(".json", "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@ -101,17 +134,25 @@ if __name__ == "__main__":
|
||||
build_reports_map = {}
|
||||
for root, dirs, files in os.walk(reports_path):
|
||||
for f in files:
|
||||
if f.startswith("build_urls_") and f.endswith('.json'):
|
||||
if f.startswith("build_urls_") and f.endswith(".json"):
|
||||
logging.info("Found build report json %s", f)
|
||||
build_name = get_build_name_from_file_name(f)
|
||||
if build_name in reports_order:
|
||||
with open(os.path.join(root, f), 'r') as file_handler:
|
||||
with open(os.path.join(root, f), "r") as file_handler:
|
||||
build_report = json.load(file_handler)
|
||||
build_reports_map[build_name] = build_report
|
||||
else:
|
||||
logging.info("Skipping report %s for build %s, it's not in our reports list", f, build_name)
|
||||
logging.info(
|
||||
"Skipping report %s for build %s, it's not in our reports list",
|
||||
f,
|
||||
build_name,
|
||||
)
|
||||
|
||||
build_reports = [build_reports_map[build_name] for build_name in reports_order if build_name in build_reports_map]
|
||||
build_reports = [
|
||||
build_reports_map[build_name]
|
||||
for build_name in reports_order
|
||||
if build_name in build_reports_map
|
||||
]
|
||||
|
||||
build_results = []
|
||||
build_artifacts = []
|
||||
@ -129,7 +170,7 @@ if __name__ == "__main__":
|
||||
logging.info("No builds, failing check")
|
||||
sys.exit(1)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
pr_info = PRInfo()
|
||||
|
||||
@ -139,7 +180,9 @@ if __name__ == "__main__":
|
||||
branch_name = "PR #{}".format(pr_info.number)
|
||||
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
|
||||
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
|
||||
task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
|
||||
task_url = (
|
||||
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
|
||||
)
|
||||
report = create_build_html_report(
|
||||
build_check_name,
|
||||
build_results,
|
||||
@ -148,18 +191,22 @@ if __name__ == "__main__":
|
||||
task_url,
|
||||
branch_url,
|
||||
branch_name,
|
||||
commit_url
|
||||
commit_url,
|
||||
)
|
||||
|
||||
report_path = os.path.join(temp_path, 'report.html')
|
||||
with open(report_path, 'w') as f:
|
||||
report_path = os.path.join(temp_path, "report.html")
|
||||
with open(report_path, "w") as f:
|
||||
f.write(report)
|
||||
|
||||
logging.info("Going to upload prepared report")
|
||||
context_name_for_path = build_check_name.lower().replace(' ', '_')
|
||||
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
|
||||
context_name_for_path = build_check_name.lower().replace(" ", "_")
|
||||
s3_path_prefix = (
|
||||
str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path
|
||||
)
|
||||
|
||||
url = s3_helper.upload_build_file_to_s3(report_path, s3_path_prefix + "/report.html")
|
||||
url = s3_helper.upload_build_file_to_s3(
|
||||
report_path, s3_path_prefix + "/report.html"
|
||||
)
|
||||
logging.info("Report url %s", url)
|
||||
|
||||
total_builds = len(build_results)
|
||||
@ -182,4 +229,9 @@ if __name__ == "__main__":
|
||||
print("::notice ::Report url: {}".format(url))
|
||||
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=build_check_name, description=description, state=summary_status, target_url=url)
|
||||
commit.create_status(
|
||||
context=build_check_name,
|
||||
description=description,
|
||||
state=summary_status,
|
||||
target_url=url,
|
||||
)
|
||||
|
@ -13,16 +13,19 @@ from compress_files import decompress_fast, compress_fast
|
||||
|
||||
DOWNLOAD_RETRIES_COUNT = 5
|
||||
|
||||
|
||||
def dowload_file_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(DOWNLOAD_RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'wb') as f:
|
||||
with open(path, "wb") as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info("No content-length, will download file without progress")
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
@ -34,8 +37,8 @@ def dowload_file_with_progress(url, path):
|
||||
if sys.stdout.isatty():
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
eq_str = '=' * done
|
||||
space_str = ' ' * (50 - done)
|
||||
eq_str = "=" * done
|
||||
space_str = " " * (50 - done)
|
||||
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
|
||||
sys.stdout.flush()
|
||||
break
|
||||
@ -52,7 +55,9 @@ def dowload_file_with_progress(url, path):
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
|
||||
def get_ccache_if_not_exists(
|
||||
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
|
||||
):
|
||||
ccache_name = os.path.basename(path_to_ccache_dir)
|
||||
cache_found = False
|
||||
prs_to_check = [current_pr_number]
|
||||
@ -93,13 +98,16 @@ def get_ccache_if_not_exists(path_to_ccache_dir, s3_helper, current_pr_number, t
|
||||
else:
|
||||
logging.info("ccache downloaded")
|
||||
|
||||
|
||||
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
|
||||
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)
|
||||
ccache_name = os.path.basename(path_to_ccache_dir)
|
||||
compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.gz")
|
||||
compress_fast(path_to_ccache_dir, compressed_cache_path)
|
||||
|
||||
s3_path = str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
|
||||
s3_path = (
|
||||
str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path)
|
||||
)
|
||||
logging.info("Will upload %s to path %s", compressed_cache_path, s3_path)
|
||||
s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path)
|
||||
logging.info("Upload finished")
|
||||
|
@ -20,21 +20,29 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
|
||||
sys.path.append(os.path.join(repo_path, "utils/github"))
|
||||
|
||||
|
||||
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
token = get_parameter_from_ssm("github_robot_token_1")
|
||||
|
||||
bp = Backport(token, os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"), os.environ.get("REPO_TEAM"))
|
||||
bp = Backport(
|
||||
token,
|
||||
os.environ.get("REPO_OWNER"),
|
||||
os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"),
|
||||
)
|
||||
|
||||
def cherrypick_run(token, pr, branch):
|
||||
return CherryPick(token,
|
||||
os.environ.get("REPO_OWNER"), os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"), pr, branch
|
||||
).execute(repo_path, False)
|
||||
return CherryPick(
|
||||
token,
|
||||
os.environ.get("REPO_OWNER"),
|
||||
os.environ.get("REPO_NAME"),
|
||||
os.environ.get("REPO_TEAM"),
|
||||
pr,
|
||||
branch,
|
||||
).execute(repo_path, False)
|
||||
|
||||
try:
|
||||
bp.execute(repo_path, 'origin', None, cherrypick_run)
|
||||
bp.execute(repo_path, "origin", None, cherrypick_run)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(e.output)
|
||||
|
@ -17,7 +17,9 @@ import sys
|
||||
|
||||
class Backport:
|
||||
def __init__(self, token, owner, name, team):
|
||||
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7)
|
||||
self._gh = RemoteRepo(
|
||||
token, owner=owner, name=name, team=team, max_page_size=30, min_page_size=7
|
||||
)
|
||||
self._token = token
|
||||
self.default_branch_name = self._gh.default_branch
|
||||
self.ssh_url = self._gh.ssh_url
|
||||
@ -28,7 +30,7 @@ class Backport:
|
||||
def getBranchesWithRelease(self):
|
||||
branches = set()
|
||||
for pull_request in self._gh.find_pull_requests("release"):
|
||||
branches.add(pull_request['headRefName'])
|
||||
branches.add(pull_request["headRefName"])
|
||||
return branches
|
||||
|
||||
def execute(self, repo, upstream, until_commit, run_cherrypick):
|
||||
@ -44,11 +46,11 @@ class Backport:
|
||||
branches.append(branch)
|
||||
|
||||
if not branches:
|
||||
logging.info('No release branches found!')
|
||||
logging.info("No release branches found!")
|
||||
return
|
||||
|
||||
for branch in branches:
|
||||
logging.info('Found release branch: %s', branch[0])
|
||||
logging.info("Found release branch: %s", branch[0])
|
||||
|
||||
if not until_commit:
|
||||
until_commit = branches[0][1]
|
||||
@ -56,73 +58,128 @@ class Backport:
|
||||
|
||||
backport_map = {}
|
||||
|
||||
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
|
||||
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
|
||||
RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$')
|
||||
RE_MUST_BACKPORT = re.compile(r"^v(\d+\.\d+)-must-backport$")
|
||||
RE_NO_BACKPORT = re.compile(r"^v(\d+\.\d+)-no-backport$")
|
||||
RE_BACKPORTED = re.compile(r"^v(\d+\.\d+)-backported$")
|
||||
|
||||
# pull-requests are sorted by ancestry from the most recent.
|
||||
for pr in pull_requests:
|
||||
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
|
||||
logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0]))
|
||||
while repo.comparator(branches[-1][1]) >= repo.comparator(
|
||||
pr["mergeCommit"]["oid"]
|
||||
):
|
||||
logging.info(
|
||||
"PR #{} is already inside {}. Dropping this branch for further PRs".format(
|
||||
pr["number"], branches[-1][0]
|
||||
)
|
||||
)
|
||||
branches.pop()
|
||||
|
||||
logging.info("Processing PR #{}".format(pr['number']))
|
||||
logging.info("Processing PR #{}".format(pr["number"]))
|
||||
|
||||
assert len(branches)
|
||||
|
||||
branch_set = set([branch[0] for branch in branches])
|
||||
|
||||
# First pass. Find all must-backports
|
||||
for label in pr['labels']['nodes']:
|
||||
if label['name'] == 'pr-must-backport':
|
||||
backport_map[pr['number']] = branch_set.copy()
|
||||
for label in pr["labels"]["nodes"]:
|
||||
if label["name"] == "pr-must-backport":
|
||||
backport_map[pr["number"]] = branch_set.copy()
|
||||
continue
|
||||
matched = RE_MUST_BACKPORT.match(label['name'])
|
||||
matched = RE_MUST_BACKPORT.match(label["name"])
|
||||
if matched:
|
||||
if pr['number'] not in backport_map:
|
||||
backport_map[pr['number']] = set()
|
||||
backport_map[pr['number']].add(matched.group(1))
|
||||
if pr["number"] not in backport_map:
|
||||
backport_map[pr["number"]] = set()
|
||||
backport_map[pr["number"]].add(matched.group(1))
|
||||
|
||||
# Second pass. Find all no-backports
|
||||
for label in pr['labels']['nodes']:
|
||||
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
|
||||
del backport_map[pr['number']]
|
||||
for label in pr["labels"]["nodes"]:
|
||||
if label["name"] == "pr-no-backport" and pr["number"] in backport_map:
|
||||
del backport_map[pr["number"]]
|
||||
break
|
||||
matched_no_backport = RE_NO_BACKPORT.match(label['name'])
|
||||
matched_backported = RE_BACKPORTED.match(label['name'])
|
||||
if matched_no_backport and pr['number'] in backport_map and matched_no_backport.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(matched_no_backport.group(1))
|
||||
logging.info('\tskipping %s because of forced no-backport', matched_no_backport.group(1))
|
||||
elif matched_backported and pr['number'] in backport_map and matched_backported.group(1) in backport_map[pr['number']]:
|
||||
backport_map[pr['number']].remove(matched_backported.group(1))
|
||||
logging.info('\tskipping %s because it\'s already backported manually', matched_backported.group(1))
|
||||
matched_no_backport = RE_NO_BACKPORT.match(label["name"])
|
||||
matched_backported = RE_BACKPORTED.match(label["name"])
|
||||
if (
|
||||
matched_no_backport
|
||||
and pr["number"] in backport_map
|
||||
and matched_no_backport.group(1) in backport_map[pr["number"]]
|
||||
):
|
||||
backport_map[pr["number"]].remove(matched_no_backport.group(1))
|
||||
logging.info(
|
||||
"\tskipping %s because of forced no-backport",
|
||||
matched_no_backport.group(1),
|
||||
)
|
||||
elif (
|
||||
matched_backported
|
||||
and pr["number"] in backport_map
|
||||
and matched_backported.group(1) in backport_map[pr["number"]]
|
||||
):
|
||||
backport_map[pr["number"]].remove(matched_backported.group(1))
|
||||
logging.info(
|
||||
"\tskipping %s because it's already backported manually",
|
||||
matched_backported.group(1),
|
||||
)
|
||||
|
||||
for pr, branches in list(backport_map.items()):
|
||||
logging.info('PR #%s needs to be backported to:', pr)
|
||||
logging.info("PR #%s needs to be backported to:", pr)
|
||||
for branch in branches:
|
||||
logging.info('\t%s, and the status is: %s', branch, run_cherrypick(self._token, pr, branch))
|
||||
logging.info(
|
||||
"\t%s, and the status is: %s",
|
||||
branch,
|
||||
run_cherrypick(self._token, pr, branch),
|
||||
)
|
||||
|
||||
# print API costs
|
||||
logging.info('\nGitHub API total costs per query:')
|
||||
logging.info("\nGitHub API total costs per query:")
|
||||
for name, value in list(self._gh.api_costs.items()):
|
||||
logging.info('%s : %s', name, value)
|
||||
logging.info("%s : %s", name, value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--token', type=str, required=True, help='token for Github access')
|
||||
parser.add_argument('--repo', type=str, required=True, help='path to full repository', metavar='PATH')
|
||||
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT')
|
||||
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False)
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
|
||||
parser.add_argument('--upstream', '-u', type=str, help='remote name of upstream in repository', default='origin')
|
||||
parser.add_argument(
|
||||
"--token", type=str, required=True, help="token for Github access"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
type=str,
|
||||
required=True,
|
||||
help="path to full repository",
|
||||
metavar="PATH",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--til", type=str, help="check PRs from HEAD til this commit", metavar="COMMIT"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="do not create or merge any PRs",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="more verbose output",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--upstream",
|
||||
"-u",
|
||||
type=str,
|
||||
help="remote name of upstream in repository",
|
||||
default="origin",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.basicConfig(
|
||||
format="%(message)s", stream=sys.stdout, level=logging.DEBUG
|
||||
)
|
||||
else:
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO)
|
||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.INFO)
|
||||
|
||||
cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run)
|
||||
bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core')
|
||||
cherrypick_run = lambda token, pr, branch: CherryPick(
|
||||
token, "ClickHouse", "ClickHouse", "core", pr, branch
|
||||
).execute(args.repo, args.dry_run)
|
||||
bp = Backport(args.token, "ClickHouse", "ClickHouse", "core")
|
||||
bp.execute(args.repo, args.upstream, args.til, cherrypick_run)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
"""
|
||||
Backports changes from PR to release branch.
|
||||
Requires multiple separate runs as part of the implementation.
|
||||
|
||||
@ -12,7 +12,7 @@ First run should do the following:
|
||||
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
|
||||
|
||||
Third run creates PR from backport branch (with merged previous PR) to release branch.
|
||||
'''
|
||||
"""
|
||||
|
||||
try:
|
||||
from clickhouse.utils.github.query import Query as RemoteRepo
|
||||
@ -29,13 +29,13 @@ import sys
|
||||
|
||||
class CherryPick:
|
||||
class Status(Enum):
|
||||
DISCARDED = 'discarded'
|
||||
NOT_INITIATED = 'not started'
|
||||
FIRST_MERGEABLE = 'waiting for 1st stage'
|
||||
FIRST_CONFLICTS = 'conflicts on 1st stage'
|
||||
SECOND_MERGEABLE = 'waiting for 2nd stage'
|
||||
SECOND_CONFLICTS = 'conflicts on 2nd stage'
|
||||
MERGED = 'backported'
|
||||
DISCARDED = "discarded"
|
||||
NOT_INITIATED = "not started"
|
||||
FIRST_MERGEABLE = "waiting for 1st stage"
|
||||
FIRST_CONFLICTS = "conflicts on 1st stage"
|
||||
SECOND_MERGEABLE = "waiting for 2nd stage"
|
||||
SECOND_CONFLICTS = "conflicts on 2nd stage"
|
||||
MERGED = "backported"
|
||||
|
||||
def _run(self, args):
|
||||
out = subprocess.check_output(args).rstrip()
|
||||
@ -50,51 +50,90 @@ class CherryPick:
|
||||
|
||||
# TODO: check if pull-request is merged.
|
||||
|
||||
self.merge_commit_oid = self._pr['mergeCommit']['oid']
|
||||
self.merge_commit_oid = self._pr["mergeCommit"]["oid"]
|
||||
|
||||
self.target_branch = target_branch
|
||||
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number)
|
||||
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid)
|
||||
self.backport_branch = "backport/{branch}/{pr}".format(
|
||||
branch=target_branch, pr=pr_number
|
||||
)
|
||||
self.cherrypick_branch = "cherrypick/{branch}/{oid}".format(
|
||||
branch=target_branch, oid=self.merge_commit_oid
|
||||
)
|
||||
|
||||
def getCherryPickPullRequest(self):
|
||||
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch)
|
||||
return self._gh.find_pull_request(
|
||||
base=self.backport_branch, head=self.cherrypick_branch
|
||||
)
|
||||
|
||||
def createCherryPickPullRequest(self, repo_path):
|
||||
DESCRIPTION = (
|
||||
'This pull-request is a first step of an automated backporting.\n'
|
||||
'It contains changes like after calling a local command `git cherry-pick`.\n'
|
||||
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n'
|
||||
'Otherwise, if you do not want to backport them, then just close this pull-request.\n'
|
||||
'\n'
|
||||
'The check results does not matter at this step - you can safely ignore them.\n'
|
||||
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n'
|
||||
"This pull-request is a first step of an automated backporting.\n"
|
||||
"It contains changes like after calling a local command `git cherry-pick`.\n"
|
||||
"If you intend to continue backporting this changes, then resolve all conflicts if any.\n"
|
||||
"Otherwise, if you do not want to backport them, then just close this pull-request.\n"
|
||||
"\n"
|
||||
"The check results does not matter at this step - you can safely ignore them.\n"
|
||||
"Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n"
|
||||
)
|
||||
|
||||
# FIXME: replace with something better than os.system()
|
||||
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@yandex-team.ru', '-c', 'user.name=robot-clickhouse']
|
||||
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid']
|
||||
git_prefix = [
|
||||
"git",
|
||||
"-C",
|
||||
repo_path,
|
||||
"-c",
|
||||
"user.email=robot-clickhouse@yandex-team.ru",
|
||||
"-c",
|
||||
"user.name=robot-clickhouse",
|
||||
]
|
||||
base_commit_oid = self._pr["mergeCommit"]["parents"]["nodes"][0]["oid"]
|
||||
|
||||
# Create separate branch for backporting, and make it look like real cherry-pick.
|
||||
self._run(git_prefix + ['checkout', '-f', self.target_branch])
|
||||
self._run(git_prefix + ['checkout', '-B', self.backport_branch])
|
||||
self._run(git_prefix + ['merge', '-s', 'ours', '--no-edit', base_commit_oid])
|
||||
self._run(git_prefix + ["checkout", "-f", self.target_branch])
|
||||
self._run(git_prefix + ["checkout", "-B", self.backport_branch])
|
||||
self._run(git_prefix + ["merge", "-s", "ours", "--no-edit", base_commit_oid])
|
||||
|
||||
# Create secondary branch to allow pull request with cherry-picked commit.
|
||||
self._run(git_prefix + ['branch', '-f', self.cherrypick_branch, self.merge_commit_oid])
|
||||
self._run(
|
||||
git_prefix + ["branch", "-f", self.cherrypick_branch, self.merge_commit_oid]
|
||||
)
|
||||
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.cherrypick_branch)])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
||||
]
|
||||
)
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.cherrypick_branch),
|
||||
]
|
||||
)
|
||||
|
||||
# Create pull-request like a local cherry-pick
|
||||
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch,
|
||||
title='Cherry pick #{number} to {target}: {title}'.format(
|
||||
number=self._pr['number'], target=self.target_branch,
|
||||
title=self._pr['title'].replace('"', '\\"')),
|
||||
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION))
|
||||
pr = self._gh.create_pull_request(
|
||||
source=self.cherrypick_branch,
|
||||
target=self.backport_branch,
|
||||
title="Cherry pick #{number} to {target}: {title}".format(
|
||||
number=self._pr["number"],
|
||||
target=self.target_branch,
|
||||
title=self._pr["title"].replace('"', '\\"'),
|
||||
),
|
||||
description="Original pull-request #{}\n\n{}".format(
|
||||
self._pr["number"], DESCRIPTION
|
||||
),
|
||||
)
|
||||
|
||||
# FIXME: use `team` to leave a single eligible assignee.
|
||||
self._gh.add_assignee(pr, self._pr['author'])
|
||||
self._gh.add_assignee(pr, self._pr['mergedBy'])
|
||||
self._gh.add_assignee(pr, self._pr["author"])
|
||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
||||
|
||||
self._gh.set_label(pr, "do not test")
|
||||
self._gh.set_label(pr, "pr-cherrypick")
|
||||
@ -102,36 +141,76 @@ class CherryPick:
|
||||
return pr
|
||||
|
||||
def mergeCherryPickPullRequest(self, cherrypick_pr):
|
||||
return self._gh.merge_pull_request(cherrypick_pr['id'])
|
||||
return self._gh.merge_pull_request(cherrypick_pr["id"])
|
||||
|
||||
def getBackportPullRequest(self):
|
||||
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch)
|
||||
return self._gh.find_pull_request(
|
||||
base=self.target_branch, head=self.backport_branch
|
||||
)
|
||||
|
||||
def createBackportPullRequest(self, cherrypick_pr, repo_path):
|
||||
DESCRIPTION = (
|
||||
'This pull-request is a last step of an automated backporting.\n'
|
||||
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n'
|
||||
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n'
|
||||
"This pull-request is a last step of an automated backporting.\n"
|
||||
"Treat it as a standard pull-request: look at the checks and resolve conflicts.\n"
|
||||
"Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n"
|
||||
)
|
||||
|
||||
git_prefix = ['git', '-C', repo_path, '-c', 'user.email=robot-clickhouse@clickhouse.com', '-c', 'user.name=robot-clickhouse']
|
||||
git_prefix = [
|
||||
"git",
|
||||
"-C",
|
||||
repo_path,
|
||||
"-c",
|
||||
"user.email=robot-clickhouse@clickhouse.com",
|
||||
"-c",
|
||||
"user.name=robot-clickhouse",
|
||||
]
|
||||
|
||||
pr_title = 'Backport #{number} to {target}: {title}'.format(
|
||||
number=self._pr['number'], target=self.target_branch,
|
||||
title=self._pr['title'].replace('"', '\\"'))
|
||||
pr_title = "Backport #{number} to {target}: {title}".format(
|
||||
number=self._pr["number"],
|
||||
target=self.target_branch,
|
||||
title=self._pr["title"].replace('"', '\\"'),
|
||||
)
|
||||
|
||||
self._run(git_prefix + ['checkout', '-f', self.backport_branch])
|
||||
self._run(git_prefix + ['pull', '--ff-only', 'origin', self.backport_branch])
|
||||
self._run(git_prefix + ['reset', '--soft', self._run(git_prefix + ['merge-base', 'origin/' + self.target_branch, self.backport_branch])])
|
||||
self._run(git_prefix + ['commit', '-a', '--allow-empty', '-m', pr_title])
|
||||
self._run(git_prefix + ['push', '-f', 'origin', '{branch}:{branch}'.format(branch=self.backport_branch)])
|
||||
self._run(git_prefix + ["checkout", "-f", self.backport_branch])
|
||||
self._run(git_prefix + ["pull", "--ff-only", "origin", self.backport_branch])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"reset",
|
||||
"--soft",
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"merge-base",
|
||||
"origin/" + self.target_branch,
|
||||
self.backport_branch,
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
self._run(git_prefix + ["commit", "-a", "--allow-empty", "-m", pr_title])
|
||||
self._run(
|
||||
git_prefix
|
||||
+ [
|
||||
"push",
|
||||
"-f",
|
||||
"origin",
|
||||
"{branch}:{branch}".format(branch=self.backport_branch),
|
||||
]
|
||||
)
|
||||
|
||||
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, title=pr_title,
|
||||
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION))
|
||||
pr = self._gh.create_pull_request(
|
||||
source=self.backport_branch,
|
||||
target=self.target_branch,
|
||||
title=pr_title,
|
||||
description="Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}".format(
|
||||
self._pr["number"], cherrypick_pr["number"], DESCRIPTION
|
||||
),
|
||||
)
|
||||
|
||||
# FIXME: use `team` to leave a single eligible assignee.
|
||||
self._gh.add_assignee(pr, self._pr['author'])
|
||||
self._gh.add_assignee(pr, self._pr['mergedBy'])
|
||||
self._gh.add_assignee(pr, self._pr["author"])
|
||||
self._gh.add_assignee(pr, self._pr["mergedBy"])
|
||||
|
||||
self._gh.set_label(pr, "pr-backport")
|
||||
|
||||
@ -142,23 +221,43 @@ class CherryPick:
|
||||
if not pr1:
|
||||
if not dry_run:
|
||||
pr1 = self.createCherryPickPullRequest(repo_path)
|
||||
logging.debug('Created PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Created PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
else:
|
||||
return CherryPick.Status.NOT_INITIATED
|
||||
else:
|
||||
logging.debug('Found PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Found PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']:
|
||||
if not pr1["merged"] and pr1["mergeable"] == "MERGEABLE" and not pr1["closed"]:
|
||||
if not dry_run:
|
||||
pr1 = self.mergeCherryPickPullRequest(pr1)
|
||||
logging.debug('Merged PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
logging.debug(
|
||||
"Merged PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if not pr1['merged']:
|
||||
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', self._pr['number'], self.target_branch, pr1['url'])
|
||||
if not pr1["merged"]:
|
||||
logging.debug(
|
||||
"Waiting for PR with cherry-pick of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr1["url"],
|
||||
)
|
||||
|
||||
if pr1['closed']:
|
||||
if pr1["closed"]:
|
||||
return CherryPick.Status.DISCARDED
|
||||
elif pr1['mergeable'] == 'CONFLICTING':
|
||||
elif pr1["mergeable"] == "CONFLICTING":
|
||||
return CherryPick.Status.FIRST_CONFLICTS
|
||||
else:
|
||||
return CherryPick.Status.FIRST_MERGEABLE
|
||||
@ -167,31 +266,58 @@ class CherryPick:
|
||||
if not pr2:
|
||||
if not dry_run:
|
||||
pr2 = self.createBackportPullRequest(pr1, repo_path)
|
||||
logging.debug('Created PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
|
||||
logging.debug(
|
||||
"Created PR with backport of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr2["url"],
|
||||
)
|
||||
else:
|
||||
return CherryPick.Status.FIRST_MERGEABLE
|
||||
else:
|
||||
logging.debug('Found PR with backport of %s to %s: %s', self._pr['number'], self.target_branch, pr2['url'])
|
||||
logging.debug(
|
||||
"Found PR with backport of %s to %s: %s",
|
||||
self._pr["number"],
|
||||
self.target_branch,
|
||||
pr2["url"],
|
||||
)
|
||||
|
||||
if pr2['merged']:
|
||||
if pr2["merged"]:
|
||||
return CherryPick.Status.MERGED
|
||||
elif pr2['closed']:
|
||||
elif pr2["closed"]:
|
||||
return CherryPick.Status.DISCARDED
|
||||
elif pr2['mergeable'] == 'CONFLICTING':
|
||||
elif pr2["mergeable"] == "CONFLICTING":
|
||||
return CherryPick.Status.SECOND_CONFLICTS
|
||||
else:
|
||||
return CherryPick.Status.SECOND_MERGEABLE
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
|
||||
logging.basicConfig(format="%(message)s", stream=sys.stdout, level=logging.DEBUG)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access')
|
||||
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick')
|
||||
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick')
|
||||
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH')
|
||||
parser.add_argument(
|
||||
"--token", "-t", type=str, required=True, help="token for Github access"
|
||||
)
|
||||
parser.add_argument("--pr", type=str, required=True, help="PR# to cherry-pick")
|
||||
parser.add_argument(
|
||||
"--branch",
|
||||
"-b",
|
||||
type=str,
|
||||
required=True,
|
||||
help="target branch name for cherry-pick",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
"-r",
|
||||
type=str,
|
||||
required=True,
|
||||
help="path to full repository",
|
||||
metavar="PATH",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
cp = CherryPick(args.token, 'ClickHouse', 'ClickHouse', 'core', args.pr, args.branch)
|
||||
cp = CherryPick(
|
||||
args.token, "ClickHouse", "ClickHouse", "core", args.pr, args.branch
|
||||
)
|
||||
cp.execute(args.repo)
|
||||
|
@ -20,13 +20,14 @@ class RepositoryBase:
|
||||
return -1
|
||||
else:
|
||||
return 1
|
||||
|
||||
self.comparator = functools.cmp_to_key(cmp)
|
||||
|
||||
def get_head_commit(self):
|
||||
return self._repo.commit(self._default)
|
||||
|
||||
def iterate(self, begin, end):
|
||||
rev_range = '{}...{}'.format(begin, end)
|
||||
rev_range = "{}...{}".format(begin, end)
|
||||
for commit in self._repo.iter_commits(rev_range, first_parent=True):
|
||||
yield commit
|
||||
|
||||
@ -39,27 +40,35 @@ class Repository(RepositoryBase):
|
||||
self._default = self._remote.refs[default_branch_name]
|
||||
|
||||
def get_release_branches(self):
|
||||
'''
|
||||
"""
|
||||
Returns sorted list of tuples:
|
||||
* remote branch (git.refs.remote.RemoteReference),
|
||||
* base commit (git.Commit),
|
||||
* head (git.Commit)).
|
||||
List is sorted by commits in ascending order.
|
||||
'''
|
||||
"""
|
||||
release_branches = []
|
||||
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/remotes/.+/\d+\.\d+$")
|
||||
|
||||
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]:
|
||||
for branch in [
|
||||
r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)
|
||||
]:
|
||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
||||
if not base:
|
||||
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
|
||||
logging.info(
|
||||
"Branch %s is not based on branch %s. Ignoring.",
|
||||
branch.path,
|
||||
self._default,
|
||||
)
|
||||
elif len(base) > 1:
|
||||
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
|
||||
logging.info(
|
||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
||||
)
|
||||
else:
|
||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
||||
|
||||
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
|
||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
||||
|
||||
|
||||
class BareRepository(RepositoryBase):
|
||||
@ -68,24 +77,32 @@ class BareRepository(RepositoryBase):
|
||||
self._default = self._repo.branches[default_branch_name]
|
||||
|
||||
def get_release_branches(self):
|
||||
'''
|
||||
"""
|
||||
Returns sorted list of tuples:
|
||||
* branch (git.refs.head?),
|
||||
* base commit (git.Commit),
|
||||
* head (git.Commit)).
|
||||
List is sorted by commits in ascending order.
|
||||
'''
|
||||
"""
|
||||
release_branches = []
|
||||
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$')
|
||||
RE_RELEASE_BRANCH_REF = re.compile(r"^refs/heads/\d+\.\d+$")
|
||||
|
||||
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]:
|
||||
for branch in [
|
||||
r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)
|
||||
]:
|
||||
base = self._repo.merge_base(self._default, self._repo.commit(branch))
|
||||
if not base:
|
||||
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
|
||||
logging.info(
|
||||
"Branch %s is not based on branch %s. Ignoring.",
|
||||
branch.path,
|
||||
self._default,
|
||||
)
|
||||
elif len(base) > 1:
|
||||
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
|
||||
logging.info(
|
||||
"Branch %s has more than one base commit. Ignoring.", branch.path
|
||||
)
|
||||
else:
|
||||
release_branches.append((os.path.basename(branch.name), base[0]))
|
||||
|
||||
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
|
||||
return sorted(release_branches, key=lambda x: self.comparator(x[1]))
|
||||
|
@ -1,19 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class Description:
|
||||
'''Parsed description representation
|
||||
'''
|
||||
"""Parsed description representation"""
|
||||
|
||||
MAP_CATEGORY_TO_LABEL = {
|
||||
'New Feature': 'pr-feature',
|
||||
'Bug Fix': 'pr-bugfix',
|
||||
'Improvement': 'pr-improvement',
|
||||
'Performance Improvement': 'pr-performance',
|
||||
"New Feature": "pr-feature",
|
||||
"Bug Fix": "pr-bugfix",
|
||||
"Improvement": "pr-improvement",
|
||||
"Performance Improvement": "pr-performance",
|
||||
# 'Backward Incompatible Change': doesn't match anything
|
||||
'Build/Testing/Packaging Improvement': 'pr-build',
|
||||
'Non-significant (changelog entry is not needed)': 'pr-non-significant',
|
||||
'Non-significant (changelog entry is not required)': 'pr-non-significant',
|
||||
'Non-significant': 'pr-non-significant',
|
||||
'Documentation (changelog entry is not required)': 'pr-documentation',
|
||||
"Build/Testing/Packaging Improvement": "pr-build",
|
||||
"Non-significant (changelog entry is not needed)": "pr-non-significant",
|
||||
"Non-significant (changelog entry is not required)": "pr-non-significant",
|
||||
"Non-significant": "pr-non-significant",
|
||||
"Documentation (changelog entry is not required)": "pr-documentation",
|
||||
# 'Other': doesn't match anything
|
||||
}
|
||||
|
||||
@ -21,7 +22,7 @@ class Description:
|
||||
self.label_name = str()
|
||||
self.legal = False
|
||||
|
||||
self._parse(pull_request['bodyText'])
|
||||
self._parse(pull_request["bodyText"])
|
||||
|
||||
def _parse(self, text):
|
||||
lines = text.splitlines()
|
||||
@ -38,14 +39,17 @@ class Description:
|
||||
category = stripped
|
||||
next_category = False
|
||||
|
||||
if stripped == 'I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en':
|
||||
if (
|
||||
stripped
|
||||
== "I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en"
|
||||
):
|
||||
self.legal = True
|
||||
|
||||
category_headers = (
|
||||
'Category (leave one):',
|
||||
'Changelog category (leave one):',
|
||||
'Changelog category:',
|
||||
'Category:'
|
||||
"Category (leave one):",
|
||||
"Changelog category (leave one):",
|
||||
"Changelog category:",
|
||||
"Category:",
|
||||
)
|
||||
|
||||
if stripped in category_headers:
|
||||
@ -55,6 +59,6 @@ class Description:
|
||||
self.label_name = Description.MAP_CATEGORY_TO_LABEL[category]
|
||||
else:
|
||||
if not category:
|
||||
print('Cannot find category in pr description')
|
||||
print("Cannot find category in pr description")
|
||||
else:
|
||||
print(('Unknown category: ' + category))
|
||||
print(("Unknown category: " + category))
|
||||
|
@ -5,11 +5,11 @@ import time
|
||||
|
||||
|
||||
class Query:
|
||||
'''
|
||||
"""
|
||||
Implements queries to the Github API using GraphQL
|
||||
'''
|
||||
"""
|
||||
|
||||
_PULL_REQUEST = '''
|
||||
_PULL_REQUEST = """
|
||||
author {{
|
||||
... on User {{
|
||||
id
|
||||
@ -47,7 +47,7 @@ class Query:
|
||||
number
|
||||
title
|
||||
url
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=10):
|
||||
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
|
||||
@ -63,14 +63,14 @@ class Query:
|
||||
self.api_costs = {}
|
||||
|
||||
repo = self.get_repository()
|
||||
self._id = repo['id']
|
||||
self.ssh_url = repo['sshUrl']
|
||||
self.default_branch = repo['defaultBranchRef']['name']
|
||||
self._id = repo["id"]
|
||||
self.ssh_url = repo["sshUrl"]
|
||||
self.default_branch = repo["defaultBranchRef"]["name"]
|
||||
|
||||
self.members = set(self.get_members())
|
||||
|
||||
def get_repository(self):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
defaultBranchRef {{
|
||||
name
|
||||
@ -78,19 +78,19 @@ class Query:
|
||||
id
|
||||
sshUrl
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name)
|
||||
return self._run(query)['repository']
|
||||
return self._run(query)["repository"]
|
||||
|
||||
def get_members(self):
|
||||
'''Get all team members for organization
|
||||
"""Get all team members for organization
|
||||
|
||||
Returns:
|
||||
members: a map of members' logins to ids
|
||||
'''
|
||||
"""
|
||||
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
organization(login: "{organization}") {{
|
||||
team(slug: "{team}") {{
|
||||
members(first: {max_page_size} {next}) {{
|
||||
@ -105,43 +105,54 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
members = {}
|
||||
not_end = True
|
||||
query = _QUERY.format(organization=self._owner, team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='')
|
||||
query = _QUERY.format(
|
||||
organization=self._owner,
|
||||
team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['organization']['team']
|
||||
result = self._run(query)["organization"]["team"]
|
||||
if result is None:
|
||||
break
|
||||
result = result['members']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _QUERY.format(organization=self._owner, team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = result["members"]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _QUERY.format(
|
||||
organization=self._owner,
|
||||
team=self._team,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
members += dict([(node['login'], node['id']) for node in result['nodes']])
|
||||
members += dict([(node["login"], node["id"]) for node in result["nodes"]])
|
||||
|
||||
return members
|
||||
|
||||
def get_pull_request(self, number):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequest(number: {number}) {{
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, number=number,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
return self._run(query)['repository']['pullRequest']
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
number=number,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
return self._run(query)["repository"]["pullRequest"]
|
||||
|
||||
def find_pull_request(self, base, head):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
|
||||
nodes {{
|
||||
@ -150,21 +161,27 @@ class Query:
|
||||
totalCount
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
result = self._run(query)['repository']['pullRequests']
|
||||
if result['totalCount'] > 0:
|
||||
return result['nodes'][0]
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
base=base,
|
||||
head=head,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
result = self._run(query)["repository"]["pullRequests"]
|
||||
if result["totalCount"] > 0:
|
||||
return result["nodes"][0]
|
||||
else:
|
||||
return {}
|
||||
|
||||
def find_pull_requests(self, label_name):
|
||||
'''
|
||||
"""
|
||||
Get all pull-requests filtered by label name
|
||||
'''
|
||||
_QUERY = '''
|
||||
"""
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
pullRequests(first: {min_page_size} labels: "{label_name}" states: OPEN) {{
|
||||
nodes {{
|
||||
@ -172,18 +189,23 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
|
||||
return self._run(query)['repository']['pullRequests']['nodes']
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
min_page_size=self._min_page_size,
|
||||
)
|
||||
return self._run(query)["repository"]["pullRequests"]["nodes"]
|
||||
|
||||
def get_pull_requests(self, before_commit):
|
||||
'''
|
||||
"""
|
||||
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
|
||||
'''
|
||||
"""
|
||||
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
defaultBranchRef {{
|
||||
target {{
|
||||
@ -221,44 +243,60 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
pull_requests = []
|
||||
not_end = True
|
||||
query = _QUERY.format(owner=self._owner, name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='')
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['repository']['defaultBranchRef']['target']['history']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _QUERY.format(owner=self._owner, name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = self._run(query)["repository"]["defaultBranchRef"]["target"][
|
||||
"history"
|
||||
]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _QUERY.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
max_page_size=self._max_page_size,
|
||||
min_page_size=self._min_page_size,
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
for commit in result['nodes']:
|
||||
for commit in result["nodes"]:
|
||||
# FIXME: maybe include `before_commit`?
|
||||
if str(commit['oid']) == str(before_commit):
|
||||
if str(commit["oid"]) == str(before_commit):
|
||||
not_end = False
|
||||
break
|
||||
|
||||
# TODO: fetch all pull-requests that were merged in a single commit.
|
||||
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size
|
||||
assert (
|
||||
commit["associatedPullRequests"]["totalCount"]
|
||||
<= self._min_page_size
|
||||
)
|
||||
|
||||
for pull_request in commit['associatedPullRequests']['nodes']:
|
||||
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and
|
||||
pull_request['baseRefName'] == self.default_branch and
|
||||
pull_request['mergeCommit']['oid'] == commit['oid']):
|
||||
for pull_request in commit["associatedPullRequests"]["nodes"]:
|
||||
if (
|
||||
pull_request["baseRepository"]["nameWithOwner"]
|
||||
== "{}/{}".format(self._owner, self._name)
|
||||
and pull_request["baseRefName"] == self.default_branch
|
||||
and pull_request["mergeCommit"]["oid"] == commit["oid"]
|
||||
):
|
||||
pull_requests.append(pull_request)
|
||||
|
||||
return pull_requests
|
||||
|
||||
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True):
|
||||
_QUERY = '''
|
||||
def create_pull_request(
|
||||
self, source, target, title, description="", draft=False, can_modify=True
|
||||
):
|
||||
_QUERY = """
|
||||
createPullRequest(input: {{
|
||||
baseRefName: "{target}",
|
||||
headRefName: "{source}",
|
||||
@ -272,15 +310,22 @@ class Query:
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description,
|
||||
draft="true" if draft else "false", modify="true" if can_modify else "false",
|
||||
pull_request_data=self._PULL_REQUEST)
|
||||
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest']
|
||||
query = _QUERY.format(
|
||||
target=target,
|
||||
source=source,
|
||||
id=self._id,
|
||||
title=title,
|
||||
body=description,
|
||||
draft="true" if draft else "false",
|
||||
modify="true" if can_modify else "false",
|
||||
pull_request_data=self._PULL_REQUEST,
|
||||
)
|
||||
return self._run(query, is_mutation=True)["createPullRequest"]["pullRequest"]
|
||||
|
||||
def merge_pull_request(self, id):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
mergePullRequest(input: {{
|
||||
pullRequestId: "{id}"
|
||||
}}) {{
|
||||
@ -288,35 +333,35 @@ class Query:
|
||||
{pull_request_data}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(id=id, pull_request_data=self._PULL_REQUEST)
|
||||
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest']
|
||||
return self._run(query, is_mutation=True)["mergePullRequest"]["pullRequest"]
|
||||
|
||||
# FIXME: figure out how to add more assignees at once
|
||||
def add_assignee(self, pr, assignee):
|
||||
_QUERY = '''
|
||||
_QUERY = """
|
||||
addAssigneesToAssignable(input: {{
|
||||
assignableId: "{id1}",
|
||||
assigneeIds: "{id2}"
|
||||
}}) {{
|
||||
clientMutationId
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
query = _QUERY.format(id1=pr['id'], id2=assignee['id'])
|
||||
query = _QUERY.format(id1=pr["id"], id2=assignee["id"])
|
||||
self._run(query, is_mutation=True)
|
||||
|
||||
def set_label(self, pull_request, label_name):
|
||||
'''
|
||||
"""
|
||||
Set label by name to the pull request
|
||||
|
||||
Args:
|
||||
pull_request: JSON object returned by `get_pull_requests()`
|
||||
label_name (string): label name
|
||||
'''
|
||||
"""
|
||||
|
||||
_GET_LABEL = '''
|
||||
_GET_LABEL = """
|
||||
repository(owner: "{owner}" name: "{name}") {{
|
||||
labels(first: {max_page_size} {next} query: "{label_name}") {{
|
||||
pageInfo {{
|
||||
@ -330,36 +375,44 @@ class Query:
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
_SET_LABEL = '''
|
||||
_SET_LABEL = """
|
||||
addLabelsToLabelable(input: {{
|
||||
labelableId: "{pr_id}",
|
||||
labelIds: "{label_id}"
|
||||
}}) {{
|
||||
clientMutationId
|
||||
}}
|
||||
'''
|
||||
"""
|
||||
|
||||
labels = []
|
||||
not_end = True
|
||||
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='')
|
||||
query = _GET_LABEL.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next="",
|
||||
)
|
||||
|
||||
while not_end:
|
||||
result = self._run(query)['repository']['labels']
|
||||
not_end = result['pageInfo']['hasNextPage']
|
||||
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
|
||||
result = self._run(query)["repository"]["labels"]
|
||||
not_end = result["pageInfo"]["hasNextPage"]
|
||||
query = _GET_LABEL.format(
|
||||
owner=self._owner,
|
||||
name=self._name,
|
||||
label_name=label_name,
|
||||
max_page_size=self._max_page_size,
|
||||
next='after: "{}"'.format(result["pageInfo"]["endCursor"]),
|
||||
)
|
||||
|
||||
labels += [label for label in result['nodes']]
|
||||
labels += [label for label in result["nodes"]]
|
||||
|
||||
if not labels:
|
||||
return
|
||||
|
||||
query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id'])
|
||||
query = _SET_LABEL.format(pr_id=pull_request["id"], label_id=labels[0]["id"])
|
||||
self._run(query, is_mutation=True)
|
||||
|
||||
def _run(self, query, is_mutation=False):
|
||||
@ -385,19 +438,21 @@ class Query:
|
||||
status_forcelist=status_forcelist,
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
return session
|
||||
|
||||
headers = {'Authorization': 'bearer {}'.format(self._token)}
|
||||
headers = {"Authorization": "bearer {}".format(self._token)}
|
||||
if is_mutation:
|
||||
query = '''
|
||||
query = """
|
||||
mutation {{
|
||||
{query}
|
||||
}}
|
||||
'''.format(query=query)
|
||||
""".format(
|
||||
query=query
|
||||
)
|
||||
else:
|
||||
query = '''
|
||||
query = """
|
||||
query {{
|
||||
{query}
|
||||
rateLimit {{
|
||||
@ -405,23 +460,38 @@ class Query:
|
||||
remaining
|
||||
}}
|
||||
}}
|
||||
'''.format(query=query)
|
||||
""".format(
|
||||
query=query
|
||||
)
|
||||
|
||||
while True:
|
||||
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
|
||||
request = requests_retry_session().post(
|
||||
"https://api.github.com/graphql", json={"query": query}, headers=headers
|
||||
)
|
||||
if request.status_code == 200:
|
||||
result = request.json()
|
||||
if 'errors' in result:
|
||||
raise Exception('Errors occurred: {}\nOriginal query: {}'.format(result["errors"], query))
|
||||
if "errors" in result:
|
||||
raise Exception(
|
||||
"Errors occurred: {}\nOriginal query: {}".format(
|
||||
result["errors"], query
|
||||
)
|
||||
)
|
||||
|
||||
if not is_mutation:
|
||||
import inspect
|
||||
|
||||
caller = inspect.getouterframes(inspect.currentframe(), 2)[1][3]
|
||||
if caller not in list(self.api_costs.keys()):
|
||||
self.api_costs[caller] = 0
|
||||
self.api_costs[caller] += result['data']['rateLimit']['cost']
|
||||
self.api_costs[caller] += result["data"]["rateLimit"]["cost"]
|
||||
|
||||
return result['data']
|
||||
return result["data"]
|
||||
else:
|
||||
import json
|
||||
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))
|
||||
|
||||
raise Exception(
|
||||
"Query failed with code {code}:\n{json}".format(
|
||||
code=request.status_code,
|
||||
json=json.dumps(request.json(), indent=4),
|
||||
)
|
||||
)
|
||||
|
@ -6,6 +6,7 @@ import json
|
||||
import requests # type: ignore
|
||||
from get_robot_token import get_parameter_from_ssm
|
||||
|
||||
|
||||
class ClickHouseHelper:
|
||||
def __init__(self, url=None, user=None, password=None):
|
||||
self.url2 = None
|
||||
@ -15,27 +16,35 @@ class ClickHouseHelper:
|
||||
url = get_parameter_from_ssm("clickhouse-test-stat-url")
|
||||
self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2")
|
||||
self.auth2 = {
|
||||
'X-ClickHouse-User': get_parameter_from_ssm("clickhouse-test-stat-login2"),
|
||||
'X-ClickHouse-Key': ''
|
||||
"X-ClickHouse-User": get_parameter_from_ssm(
|
||||
"clickhouse-test-stat-login2"
|
||||
),
|
||||
"X-ClickHouse-Key": "",
|
||||
}
|
||||
|
||||
self.url = url
|
||||
self.auth = {
|
||||
'X-ClickHouse-User': user if user is not None else get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||
'X-ClickHouse-Key': password if password is not None else get_parameter_from_ssm("clickhouse-test-stat-password")
|
||||
"X-ClickHouse-User": user
|
||||
if user is not None
|
||||
else get_parameter_from_ssm("clickhouse-test-stat-login"),
|
||||
"X-ClickHouse-Key": password
|
||||
if password is not None
|
||||
else get_parameter_from_ssm("clickhouse-test-stat-password"),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _insert_json_str_info_impl(url, auth, db, table, json_str):
|
||||
params = {
|
||||
'database': db,
|
||||
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table),
|
||||
'date_time_input_format': 'best_effort',
|
||||
'send_logs_level': 'warning',
|
||||
"database": db,
|
||||
"query": "INSERT INTO {table} FORMAT JSONEachRow".format(table=table),
|
||||
"date_time_input_format": "best_effort",
|
||||
"send_logs_level": "warning",
|
||||
}
|
||||
|
||||
for i in range(5):
|
||||
response = requests.post(url, params=params, data=json_str, headers=auth, verify=False)
|
||||
response = requests.post(
|
||||
url, params=params, data=json_str, headers=auth, verify=False
|
||||
)
|
||||
|
||||
logging.info("Response content '%s'", response.content)
|
||||
|
||||
@ -43,16 +52,25 @@ class ClickHouseHelper:
|
||||
break
|
||||
|
||||
error = (
|
||||
"Cannot insert data into clickhouse at try " + str(i)
|
||||
+ ": HTTP code " + str(response.status_code) + ": '"
|
||||
+ str(response.text) + "'")
|
||||
"Cannot insert data into clickhouse at try "
|
||||
+ str(i)
|
||||
+ ": HTTP code "
|
||||
+ str(response.status_code)
|
||||
+ ": '"
|
||||
+ str(response.text)
|
||||
+ "'"
|
||||
)
|
||||
|
||||
if response.status_code >= 500:
|
||||
# A retriable error
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
logging.info("Request headers '%s', body '%s'", response.request.headers, response.request.body)
|
||||
logging.info(
|
||||
"Request headers '%s', body '%s'",
|
||||
response.request.headers,
|
||||
response.request.body,
|
||||
)
|
||||
|
||||
raise Exception(error)
|
||||
else:
|
||||
@ -72,18 +90,20 @@ class ClickHouseHelper:
|
||||
for event in events:
|
||||
jsons.append(json.dumps(event))
|
||||
|
||||
self._insert_json_str_info(db, table, ','.join(jsons))
|
||||
self._insert_json_str_info(db, table, ",".join(jsons))
|
||||
|
||||
def _select_and_get_json_each_row(self, db, query):
|
||||
params = {
|
||||
'database': db,
|
||||
'query': query,
|
||||
'default_format': 'JSONEachRow',
|
||||
"database": db,
|
||||
"query": query,
|
||||
"default_format": "JSONEachRow",
|
||||
}
|
||||
for i in range(5):
|
||||
response = None
|
||||
try:
|
||||
response = requests.get(self.url, params=params, headers=self.auth, verify=False)
|
||||
response = requests.get(
|
||||
self.url, params=params, headers=self.auth, verify=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.text
|
||||
except Exception as ex:
|
||||
@ -97,15 +117,21 @@ class ClickHouseHelper:
|
||||
def select_json_each_row(self, db, query):
|
||||
text = self._select_and_get_json_each_row(db, query)
|
||||
result = []
|
||||
for line in text.split('\n'):
|
||||
for line in text.split("\n"):
|
||||
if line:
|
||||
result.append(json.loads(line))
|
||||
return result
|
||||
|
||||
|
||||
def prepare_tests_results_for_clickhouse(
|
||||
pr_info, test_results,
|
||||
check_status, check_duration, check_start_time,
|
||||
report_url, check_name):
|
||||
pr_info,
|
||||
test_results,
|
||||
check_status,
|
||||
check_duration,
|
||||
check_start_time,
|
||||
report_url,
|
||||
check_name,
|
||||
):
|
||||
|
||||
pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
|
||||
base_ref = "master"
|
||||
@ -147,13 +173,14 @@ def prepare_tests_results_for_clickhouse(
|
||||
test_time = 0
|
||||
if len(test_result) > 2 and test_result[2]:
|
||||
test_time = test_result[2]
|
||||
current_row['test_duration_ms'] = int(float(test_time) * 1000)
|
||||
current_row['test_name'] = test_name
|
||||
current_row['test_status'] = test_status
|
||||
current_row["test_duration_ms"] = int(float(test_time) * 1000)
|
||||
current_row["test_name"] = test_name
|
||||
current_row["test_status"] = test_status
|
||||
result.append(current_row)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
||||
try:
|
||||
query = """
|
||||
@ -164,14 +191,16 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
|
||||
AND check_name = '{check_name}'
|
||||
AND (test_status = 'FAIL' OR test_status = 'FLAKY')
|
||||
AND pull_request_number = 0
|
||||
""".format(check_name=check_name)
|
||||
""".format(
|
||||
check_name=check_name
|
||||
)
|
||||
|
||||
tests_data = clickhouse_helper.select_json_each_row('gh-data', query)
|
||||
master_failed_tests = {row['test_name'] for row in tests_data}
|
||||
logging.info("Found flaky tests: %s", ', '.join(master_failed_tests))
|
||||
tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
|
||||
master_failed_tests = {row["test_name"] for row in tests_data}
|
||||
logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
|
||||
|
||||
for test_result in test_results:
|
||||
if test_result[1] == 'FAIL' and test_result[0] in master_failed_tests:
|
||||
test_result[1] = 'FLAKY'
|
||||
if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
|
||||
test_result[1] = "FLAKY"
|
||||
except Exception as ex:
|
||||
logging.info("Exception happened during flaky tests fetch %s", ex)
|
||||
|
@ -18,13 +18,16 @@ from tee_popen import TeePopen
|
||||
|
||||
NAME = "Woboq Build (actions)"
|
||||
|
||||
|
||||
def get_run_command(repo_path, output_path, image):
|
||||
cmd = "docker run " + \
|
||||
f"--volume={repo_path}:/repo_folder " \
|
||||
f"--volume={output_path}:/test_output " \
|
||||
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
|
||||
cmd = (
|
||||
"docker run " + f"--volume={repo_path}:/repo_folder "
|
||||
f"--volume={output_path}:/test_output "
|
||||
f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}"
|
||||
)
|
||||
return cmd
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -37,8 +40,8 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser')
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
result_path = os.path.join(temp_path, "result_path")
|
||||
if not os.path.exists(result_path):
|
||||
@ -62,14 +65,20 @@ if __name__ == "__main__":
|
||||
report_path = os.path.join(result_path, "html_report")
|
||||
logging.info("Report path %s", report_path)
|
||||
s3_path_prefix = "codebrowser"
|
||||
html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports')
|
||||
html_urls = s3_helper.fast_parallel_upload_dir(
|
||||
report_path, s3_path_prefix, "clickhouse-test-reports"
|
||||
)
|
||||
|
||||
index_html = '<a href="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/index.html">HTML report</a>'
|
||||
|
||||
test_results = [(index_html, "Look at the report")]
|
||||
|
||||
report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME
|
||||
)
|
||||
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
|
||||
post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url)
|
||||
post_commit_status(
|
||||
gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url
|
||||
)
|
||||
|
@ -14,9 +14,9 @@ def override_status(status, check_name, invert=False):
|
||||
return "success"
|
||||
|
||||
if invert:
|
||||
if status == 'success':
|
||||
return 'error'
|
||||
return 'success'
|
||||
if status == "success":
|
||||
return "error"
|
||||
return "success"
|
||||
|
||||
return status
|
||||
|
||||
@ -56,6 +56,6 @@ def post_commit_status(gh, sha, check_name, description, state, report_url):
|
||||
def post_commit_status_to_file(file_path, description, state, report_url):
|
||||
if os.path.exists(file_path):
|
||||
raise Exception(f'File "{file_path}" already exists!')
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
out = csv.writer(f, delimiter='\t')
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow([state, report_url, description])
|
||||
|
@ -16,34 +16,40 @@ from build_download_helper import download_builds_filter
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_images_with_versions
|
||||
from commit_status_helper import post_commit_status
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
|
||||
IMAGE_CENTOS = "clickhouse/test-old-centos"
|
||||
MAX_GLIBC_VERSION = '2.4'
|
||||
MAX_GLIBC_VERSION = "2.4"
|
||||
DOWNLOAD_RETRIES_COUNT = 5
|
||||
CHECK_NAME = "Compatibility check (actions)"
|
||||
|
||||
|
||||
def process_os_check(log_path):
|
||||
name = os.path.basename(log_path)
|
||||
with open(log_path, 'r') as log:
|
||||
line = log.read().split('\n')[0].strip()
|
||||
if line != 'OK':
|
||||
with open(log_path, "r") as log:
|
||||
line = log.read().split("\n")[0].strip()
|
||||
if line != "OK":
|
||||
return (name, "FAIL")
|
||||
else:
|
||||
return (name, "OK")
|
||||
|
||||
|
||||
def process_glibc_check(log_path):
|
||||
bad_lines = []
|
||||
with open(log_path, 'r') as log:
|
||||
with open(log_path, "r") as log:
|
||||
for line in log:
|
||||
if line.strip():
|
||||
columns = line.strip().split(' ')
|
||||
columns = line.strip().split(" ")
|
||||
symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5
|
||||
_, version = symbol_with_glibc.split('@GLIBC_')
|
||||
if version == 'PRIVATE':
|
||||
_, version = symbol_with_glibc.split("@GLIBC_")
|
||||
if version == "PRIVATE":
|
||||
bad_lines.append((symbol_with_glibc, "FAIL"))
|
||||
elif StrictVersion(version) > MAX_GLIBC_VERSION:
|
||||
bad_lines.append((symbol_with_glibc, "FAIL"))
|
||||
@ -51,6 +57,7 @@ def process_glibc_check(log_path):
|
||||
bad_lines.append(("glibc check", "OK"))
|
||||
return bad_lines
|
||||
|
||||
|
||||
def process_result(result_folder, server_log_folder):
|
||||
summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
|
||||
|
||||
@ -86,16 +93,18 @@ def process_result(result_folder, server_log_folder):
|
||||
return status, description, summary, result_logs
|
||||
|
||||
|
||||
def get_run_commands(build_path, result_folder, server_log_folder, image_centos, image_ubuntu):
|
||||
def get_run_commands(
|
||||
build_path, result_folder, server_log_folder, image_centos, image_ubuntu
|
||||
):
|
||||
return [
|
||||
f"readelf -s {build_path}/usr/bin/clickhouse | grep '@GLIBC_' > {result_folder}/glibc.log",
|
||||
f"readelf -s {build_path}/usr/bin/clickhouse-odbc-bridge | grep '@GLIBC_' >> {result_folder}/glibc.log",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config " \
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse " \
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config " \
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config "
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_ubuntu} > {result_folder}/ubuntu:12.04",
|
||||
f"docker run --network=host --volume={build_path}/usr/bin/clickhouse:/clickhouse "
|
||||
f"--volume={build_path}/etc/clickhouse-server:/config "
|
||||
f"--volume={server_log_folder}:/var/log/clickhouse-server {image_centos} > {result_folder}/centos:5",
|
||||
]
|
||||
|
||||
|
||||
@ -124,14 +133,18 @@ if __name__ == "__main__":
|
||||
os.makedirs(packages_path)
|
||||
|
||||
def url_filter(url):
|
||||
return url.endswith('.deb') and ('clickhouse-common-static_' in url or 'clickhouse-server_' in url)
|
||||
return url.endswith(".deb") and (
|
||||
"clickhouse-common-static_" in url or "clickhouse-server_" in url
|
||||
)
|
||||
|
||||
download_builds_filter(CHECK_NAME, reports_path, packages_path, url_filter)
|
||||
|
||||
for f in os.listdir(packages_path):
|
||||
if '.deb' in f:
|
||||
if ".deb" in f:
|
||||
full_path = os.path.join(packages_path, f)
|
||||
subprocess.check_call(f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True)
|
||||
subprocess.check_call(
|
||||
f"dpkg -x {full_path} {packages_path} && rm {full_path}", shell=True
|
||||
)
|
||||
|
||||
server_log_path = os.path.join(temp_path, "server_log")
|
||||
if not os.path.exists(server_log_path):
|
||||
@ -141,7 +154,9 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
run_commands = get_run_commands(packages_path, result_path, server_log_path, docker_images[0], docker_images[1])
|
||||
run_commands = get_run_commands(
|
||||
packages_path, result_path, server_log_path, docker_images[0], docker_images[1]
|
||||
)
|
||||
|
||||
state = "success"
|
||||
for run_command in run_commands:
|
||||
@ -154,15 +169,32 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
state, description, test_results, additional_logs = process_result(
|
||||
result_path, server_log_path
|
||||
)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, CHECK_NAME, test_results)
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
additional_logs,
|
||||
CHECK_NAME,
|
||||
)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -3,20 +3,21 @@ import subprocess
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
def compress_file_fast(path, archive_path):
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
|
||||
else:
|
||||
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
|
||||
|
||||
|
||||
def compress_fast(path, archive_path, exclude=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
pigz_part = ""
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
logging.info("pigz found, will compress and decompress faster")
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
pigz_part = "-z"
|
||||
logging.info("no pigz, compressing with default tar")
|
||||
|
||||
if exclude is None:
|
||||
@ -31,21 +32,36 @@ def compress_fast(path, archive_path, exclude=None):
|
||||
path = os.path.dirname(path)
|
||||
else:
|
||||
path += "/.."
|
||||
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname)
|
||||
cmd = "tar {} {} -cf {} -C {} {}".format(
|
||||
pigz_part, exclude_part, archive_path, path, fname
|
||||
)
|
||||
logging.debug("compress_fast cmd: %s", cmd)
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def decompress_fast(archive_path, result_path=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
logging.info("pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path)
|
||||
pigz_part = ""
|
||||
if os.path.exists("/usr/bin/pigz"):
|
||||
logging.info(
|
||||
"pigz found, will compress and decompress faster ('%s' -> '%s')",
|
||||
archive_path,
|
||||
result_path,
|
||||
)
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
logging.info("no pigz, decompressing with default tar ('%s' -> '%s')", archive_path, result_path)
|
||||
pigz_part = "-z"
|
||||
logging.info(
|
||||
"no pigz, decompressing with default tar ('%s' -> '%s')",
|
||||
archive_path,
|
||||
result_path,
|
||||
)
|
||||
|
||||
if result_path is None:
|
||||
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True)
|
||||
subprocess.check_call(
|
||||
"tar {} -xf {}".format(pigz_part, archive_path), shell=True
|
||||
)
|
||||
else:
|
||||
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True)
|
||||
subprocess.check_call(
|
||||
"tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path),
|
||||
shell=True,
|
||||
)
|
||||
|
@ -8,23 +8,27 @@ import logging
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class DockerImage:
|
||||
def __init__(self, name, version : Optional[str] = None):
|
||||
def __init__(self, name, version: Optional[str] = None):
|
||||
self.name = name
|
||||
if version is None:
|
||||
self.version = 'latest'
|
||||
self.version = "latest"
|
||||
else:
|
||||
self.version = version
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}:{self.version}"
|
||||
|
||||
def get_images_with_versions(reports_path, required_image, pull=True, version : Optional[str] = None):
|
||||
|
||||
def get_images_with_versions(
|
||||
reports_path, required_image, pull=True, version: Optional[str] = None
|
||||
):
|
||||
images_path = None
|
||||
for root, _, files in os.walk(reports_path):
|
||||
for f in files:
|
||||
if f == 'changed_images.json':
|
||||
images_path = os.path.join(root, 'changed_images.json')
|
||||
if f == "changed_images.json":
|
||||
images_path = os.path.join(root, "changed_images.json")
|
||||
break
|
||||
|
||||
if not images_path:
|
||||
@ -34,7 +38,7 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
|
||||
|
||||
if images_path is not None and os.path.exists(images_path):
|
||||
logging.info("Images file exists")
|
||||
with open(images_path, 'r', encoding='utf-8') as images_fd:
|
||||
with open(images_path, "r", encoding="utf-8") as images_fd:
|
||||
images = json.load(images_fd)
|
||||
logging.info("Got images %s", images)
|
||||
else:
|
||||
@ -52,15 +56,22 @@ def get_images_with_versions(reports_path, required_image, pull=True, version :
|
||||
for i in range(10):
|
||||
try:
|
||||
logging.info("Pulling image %s", docker_image)
|
||||
latest_error = subprocess.check_output(f"docker pull {docker_image}", stderr=subprocess.STDOUT, shell=True)
|
||||
latest_error = subprocess.check_output(
|
||||
f"docker pull {docker_image}",
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
time.sleep(i * 3)
|
||||
logging.info("Got execption pulling docker %s", ex)
|
||||
else:
|
||||
raise Exception(f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}")
|
||||
raise Exception(
|
||||
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
|
||||
)
|
||||
|
||||
return docker_images
|
||||
|
||||
|
||||
def get_image_with_version(reports_path, image, pull=True, version=None):
|
||||
return get_images_with_versions(reports_path, [image], pull, version=version)[0]
|
||||
|
@ -40,7 +40,9 @@ if __name__ == "__main__":
|
||||
if not pr_info.has_changes_in_documentation():
|
||||
logging.info("No changes in documentation")
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=NAME, description="No changes in docs", state="success")
|
||||
commit.create_status(
|
||||
context=NAME, description="No changes in docs", state="success"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
logging.info("Has changes in docs")
|
||||
@ -48,15 +50,15 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-check')
|
||||
docker_image = get_image_with_version(temp_path, "clickhouse/docs-check")
|
||||
|
||||
test_output = os.path.join(temp_path, 'docs_check_log')
|
||||
test_output = os.path.join(temp_path, "docs_check_log")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
cmd = f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
|
||||
run_log_path = os.path.join(test_output, 'runlog.log')
|
||||
run_log_path = os.path.join(test_output, "runlog.log")
|
||||
logging.info("Running command: '%s'", cmd)
|
||||
|
||||
with TeePopen(cmd, run_log_path) as process:
|
||||
@ -82,10 +84,10 @@ if __name__ == "__main__":
|
||||
for f in files:
|
||||
path = os.path.join(test_output, f)
|
||||
additional_files.append(path)
|
||||
with open(path, 'r', encoding='utf-8') as check_file:
|
||||
with open(path, "r", encoding="utf-8") as check_file:
|
||||
for line in check_file:
|
||||
if "ERROR" in line:
|
||||
lines.append((line.split(':')[-1], "FAIL"))
|
||||
lines.append((line.split(":")[-1], "FAIL"))
|
||||
if lines:
|
||||
status = "failure"
|
||||
description = "Found errors in docs"
|
||||
@ -94,12 +96,22 @@ if __name__ == "__main__":
|
||||
else:
|
||||
lines.append(("Non zero exit code", "FAIL"))
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
ch_helper = ClickHouseHelper()
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
|
||||
)
|
||||
print("::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, lines, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
lines,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -34,19 +34,23 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(temp_path, 'clickhouse/docs-release')
|
||||
docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
|
||||
|
||||
test_output = os.path.join(temp_path, 'docs_release_log')
|
||||
test_output = os.path.join(temp_path, "docs_release_log")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
token = CLOUDFLARE_TOKEN
|
||||
cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \
|
||||
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
cmd = (
|
||||
"docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent "
|
||||
f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}"
|
||||
)
|
||||
|
||||
run_log_path = os.path.join(test_output, 'runlog.log')
|
||||
run_log_path = os.path.join(test_output, "runlog.log")
|
||||
|
||||
with open(run_log_path, 'w', encoding='utf-8') as log, SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
with open(run_log_path, "w", encoding="utf-8") as log, SSHKey(
|
||||
"ROBOT_CLICKHOUSE_SSH_KEY"
|
||||
):
|
||||
with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
@ -70,10 +74,10 @@ if __name__ == "__main__":
|
||||
for f in files:
|
||||
path = os.path.join(test_output, f)
|
||||
additional_files.append(path)
|
||||
with open(path, 'r', encoding='utf-8') as check_file:
|
||||
with open(path, "r", encoding="utf-8") as check_file:
|
||||
for line in check_file:
|
||||
if "ERROR" in line:
|
||||
lines.append((line.split(':')[-1], "FAIL"))
|
||||
lines.append((line.split(":")[-1], "FAIL"))
|
||||
if lines:
|
||||
status = "failure"
|
||||
description = "Found errors in docs"
|
||||
@ -82,9 +86,13 @@ if __name__ == "__main__":
|
||||
else:
|
||||
lines.append(("Non zero exit code", "FAIL"))
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME)
|
||||
report_url = upload_results(
|
||||
s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
|
||||
)
|
||||
print("::notice ::Report url: {report_url}")
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=NAME, description=description, state=status, target_url=report_url)
|
||||
commit.create_status(
|
||||
context=NAME, description=description, state=status, target_url=report_url
|
||||
)
|
||||
|
@ -22,7 +22,9 @@ CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/relea
|
||||
|
||||
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = "clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = (
|
||||
"clickhouse-common-static-dbg_{version}_amd64.deb"
|
||||
)
|
||||
CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb"
|
||||
CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb"
|
||||
|
||||
@ -35,7 +37,9 @@ class Version:
|
||||
self.version = version
|
||||
|
||||
def __lt__(self, other):
|
||||
return list(map(int, self.version.split('.'))) < list(map(int, other.version.split('.')))
|
||||
return list(map(int, self.version.split("."))) < list(
|
||||
map(int, other.version.split("."))
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.version
|
||||
@ -49,6 +53,7 @@ class ReleaseInfo:
|
||||
def __repr__(self):
|
||||
return f"ReleaseInfo: {self.version}-{self.type}"
|
||||
|
||||
|
||||
def find_previous_release(server_version, releases):
|
||||
releases.sort(key=lambda x: x.version, reverse=True)
|
||||
|
||||
@ -66,15 +71,26 @@ def get_previous_release(server_version=None):
|
||||
page = 1
|
||||
found = False
|
||||
while not found:
|
||||
response = requests.get(CLICKHOUSE_TAGS_URL, {'page': page, 'per_page': 100})
|
||||
response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
|
||||
if not response.ok:
|
||||
raise Exception('Cannot load the list of tags from github: ' + response.reason)
|
||||
raise Exception(
|
||||
"Cannot load the list of tags from github: " + response.reason
|
||||
)
|
||||
|
||||
releases_str = set(re.findall(VERSION_PATTERN, response.text))
|
||||
if len(releases_str) == 0:
|
||||
raise Exception('Cannot find previous release for ' + str(server_version) + ' server version')
|
||||
raise Exception(
|
||||
"Cannot find previous release for "
|
||||
+ str(server_version)
|
||||
+ " server version"
|
||||
)
|
||||
|
||||
releases = list(map(lambda x: ReleaseInfo(Version(x.split('-')[0]), x.split('-')[1]), releases_str))
|
||||
releases = list(
|
||||
map(
|
||||
lambda x: ReleaseInfo(Version(x.split("-")[0]), x.split("-")[1]),
|
||||
releases_str,
|
||||
)
|
||||
)
|
||||
found, previous_release = find_previous_release(server_version, releases)
|
||||
page += 1
|
||||
|
||||
@ -87,34 +103,53 @@ def download_packet(url, out_path):
|
||||
"""
|
||||
|
||||
response = requests.get(url)
|
||||
logging.info('Downloading %s', url)
|
||||
logging.info("Downloading %s", url)
|
||||
if response.ok:
|
||||
open(out_path, 'wb').write(response.content)
|
||||
open(out_path, "wb").write(response.content)
|
||||
|
||||
|
||||
def download_packets(release, dest_path=PACKETS_DIR):
|
||||
if not os.path.exists(dest_path):
|
||||
os.makedirs(dest_path)
|
||||
|
||||
logging.info('Will download %s', release)
|
||||
logging.info("Will download %s", release)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version)),
|
||||
CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path,
|
||||
CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version),
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
out_path=os.path.join(dest_path, CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version)),
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path,
|
||||
CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version),
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
out_path=os.path.join(dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)),
|
||||
CLICKHOUSE_SERVER_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version)
|
||||
),
|
||||
)
|
||||
|
||||
download_packet(
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(version=release.version, type=release.type),
|
||||
out_path=os.path.join(dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)),
|
||||
CLICKHOUSE_CLIENT_DOWNLOAD_URL.format(
|
||||
version=release.version, type=release.type
|
||||
),
|
||||
out_path=os.path.join(
|
||||
dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -123,7 +158,7 @@ def download_previous_release(dest_path):
|
||||
download_packets(current_release, dest_path=dest_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
server_version = Version(input())
|
||||
previous_release = get_previous_release(server_version)
|
||||
|
@ -7,7 +7,7 @@ from pr_info import PRInfo
|
||||
from get_robot_token import get_best_robot_token
|
||||
from commit_status_helper import get_commit
|
||||
|
||||
NAME = 'Run Check (actions)'
|
||||
NAME = "Run Check (actions)"
|
||||
|
||||
|
||||
def filter_statuses(statuses):
|
||||
@ -36,4 +36,9 @@ if __name__ == "__main__":
|
||||
url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
statuses = filter_statuses(list(commit.get_statuses()))
|
||||
if NAME in statuses and statuses[NAME].state == "pending":
|
||||
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url)
|
||||
commit.create_status(
|
||||
context=NAME,
|
||||
description="All checks finished",
|
||||
state="success",
|
||||
target_url=url,
|
||||
)
|
||||
|
@ -17,26 +17,35 @@ from build_download_helper import download_all_deb_packages
|
||||
from download_previous_release import download_previous_release
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from commit_status_helper import post_commit_status, get_commit, override_status, post_commit_status_to_file
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import (
|
||||
post_commit_status,
|
||||
get_commit,
|
||||
override_status,
|
||||
post_commit_status_to_file,
|
||||
)
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
from tee_popen import TeePopen
|
||||
|
||||
NO_CHANGES_MSG = 'Nothing to run'
|
||||
NO_CHANGES_MSG = "Nothing to run"
|
||||
|
||||
|
||||
def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
|
||||
result = []
|
||||
if 'DatabaseReplicated' in check_name:
|
||||
if "DatabaseReplicated" in check_name:
|
||||
result.append("USE_DATABASE_REPLICATED=1")
|
||||
if 'DatabaseOrdinary' in check_name:
|
||||
if "DatabaseOrdinary" in check_name:
|
||||
result.append("USE_DATABASE_ORDINARY=1")
|
||||
if 'wide parts enabled' in check_name:
|
||||
if "wide parts enabled" in check_name:
|
||||
result.append("USE_POLYMORPHIC_PARTS=1")
|
||||
|
||||
#temporary
|
||||
if 's3 storage' in check_name:
|
||||
# temporary
|
||||
if "s3 storage" in check_name:
|
||||
result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
|
||||
|
||||
if run_by_hash_total != 0:
|
||||
@ -45,37 +54,55 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_image_name(check_name):
|
||||
if 'stateless' in check_name.lower():
|
||||
return 'clickhouse/stateless-test'
|
||||
if 'stateful' in check_name.lower():
|
||||
return 'clickhouse/stateful-test'
|
||||
if "stateless" in check_name.lower():
|
||||
return "clickhouse/stateless-test"
|
||||
if "stateful" in check_name.lower():
|
||||
return "clickhouse/stateful-test"
|
||||
else:
|
||||
raise Exception(f"Cannot deduce image name based on check name {check_name}")
|
||||
|
||||
|
||||
def get_run_command(builds_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, image, flaky_check, tests_to_run):
|
||||
additional_options = ['--hung-check']
|
||||
additional_options.append('--print-time')
|
||||
def get_run_command(
|
||||
builds_path,
|
||||
repo_tests_path,
|
||||
result_path,
|
||||
server_log_path,
|
||||
kill_timeout,
|
||||
additional_envs,
|
||||
image,
|
||||
flaky_check,
|
||||
tests_to_run,
|
||||
):
|
||||
additional_options = ["--hung-check"]
|
||||
additional_options.append("--print-time")
|
||||
|
||||
if tests_to_run:
|
||||
additional_options += tests_to_run
|
||||
|
||||
additional_options_str = '-e ADDITIONAL_OPTIONS="' + ' '.join(additional_options) + '"'
|
||||
additional_options_str = (
|
||||
'-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
|
||||
)
|
||||
|
||||
envs = [f'-e MAX_RUN_TIME={int(0.9 * kill_timeout)}', '-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"']
|
||||
envs = [
|
||||
f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
|
||||
'-e S3_URL="https://clickhouse-datasets.s3.amazonaws.com"',
|
||||
]
|
||||
|
||||
if flaky_check:
|
||||
envs += ['-e NUM_TRIES=100', '-e MAX_RUN_TIME=1800']
|
||||
envs += ["-e NUM_TRIES=100", "-e MAX_RUN_TIME=1800"]
|
||||
|
||||
envs += [f'-e {e}' for e in additional_envs]
|
||||
envs += [f"-e {e}" for e in additional_envs]
|
||||
|
||||
env_str = ' '.join(envs)
|
||||
env_str = " ".join(envs)
|
||||
|
||||
return f"docker run --volume={builds_path}:/package_folder " \
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
|
||||
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " \
|
||||
return (
|
||||
f"docker run --volume={builds_path}:/package_folder "
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
|
||||
f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
|
||||
f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
|
||||
)
|
||||
|
||||
|
||||
def get_tests_to_run(pr_info):
|
||||
@ -85,32 +112,43 @@ def get_tests_to_run(pr_info):
|
||||
return []
|
||||
|
||||
for fpath in pr_info.changed_files:
|
||||
if 'tests/queries/0_stateless/0' in fpath:
|
||||
logging.info('File %s changed and seems like stateless test', fpath)
|
||||
fname = fpath.split('/')[3]
|
||||
if "tests/queries/0_stateless/0" in fpath:
|
||||
logging.info("File %s changed and seems like stateless test", fpath)
|
||||
fname = fpath.split("/")[3]
|
||||
fname_without_ext = os.path.splitext(fname)[0]
|
||||
result.add(fname_without_ext + '.')
|
||||
result.add(fname_without_ext + ".")
|
||||
return list(result)
|
||||
|
||||
|
||||
def process_results(result_folder, server_log_path):
|
||||
test_results = []
|
||||
additional_files = []
|
||||
# Just upload all files from result_folder.
|
||||
# If task provides processed results, then it's responsible for content of result_folder.
|
||||
if os.path.exists(result_folder):
|
||||
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
|
||||
test_files = [
|
||||
f
|
||||
for f in os.listdir(result_folder)
|
||||
if os.path.isfile(os.path.join(result_folder, f))
|
||||
]
|
||||
additional_files = [os.path.join(result_folder, f) for f in test_files]
|
||||
|
||||
if os.path.exists(server_log_path):
|
||||
server_log_files = [f for f in os.listdir(server_log_path) if os.path.isfile(os.path.join(server_log_path, f))]
|
||||
additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files]
|
||||
server_log_files = [
|
||||
f
|
||||
for f in os.listdir(server_log_path)
|
||||
if os.path.isfile(os.path.join(server_log_path, f))
|
||||
]
|
||||
additional_files = additional_files + [
|
||||
os.path.join(server_log_path, f) for f in server_log_files
|
||||
]
|
||||
|
||||
status = []
|
||||
status_path = os.path.join(result_folder, "check_status.tsv")
|
||||
if os.path.exists(status_path):
|
||||
logging.info("Found test_results.tsv")
|
||||
with open(status_path, 'r', encoding='utf-8') as status_file:
|
||||
status = list(csv.reader(status_file, delimiter='\t'))
|
||||
with open(status_path, "r", encoding="utf-8") as status_file:
|
||||
status = list(csv.reader(status_file, delimiter="\t"))
|
||||
|
||||
if len(status) != 1 or len(status[0]) != 2:
|
||||
logging.info("Files in result folder %s", os.listdir(result_folder))
|
||||
@ -125,8 +163,8 @@ def process_results(result_folder, server_log_path):
|
||||
logging.info("Files in result folder %s", os.listdir(result_folder))
|
||||
return "error", "Not found test_results.tsv", test_results, additional_files
|
||||
|
||||
with open(results_path, 'r', encoding='utf-8') as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter='\t'))
|
||||
with open(results_path, "r", encoding="utf-8") as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter="\t"))
|
||||
if len(test_results) == 0:
|
||||
return "error", "Empty test_results.tsv", test_results, additional_files
|
||||
|
||||
@ -137,8 +175,17 @@ def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("check_name")
|
||||
parser.add_argument("kill_timeout", type=int)
|
||||
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable")
|
||||
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status")
|
||||
parser.add_argument(
|
||||
"--validate-bugfix",
|
||||
action="store_true",
|
||||
help="Check that added tests failed on latest stable",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--post-commit-status",
|
||||
default="commit_status",
|
||||
choices=["commit_status", "file"],
|
||||
help="Where to public post commit status",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -156,7 +203,7 @@ if __name__ == "__main__":
|
||||
kill_timeout = args.kill_timeout
|
||||
validate_bugix_check = args.validate_bugfix
|
||||
|
||||
flaky_check = 'flaky' in check_name.lower()
|
||||
flaky_check = "flaky" in check_name.lower()
|
||||
|
||||
run_changed_tests = flaky_check or validate_bugix_check
|
||||
gh = Github(get_best_robot_token())
|
||||
@ -166,16 +213,23 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels:
|
||||
if args.post_commit_status == 'file':
|
||||
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null')
|
||||
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
|
||||
if args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
"Skipped (no pr-bugfix)",
|
||||
"success",
|
||||
"null",
|
||||
)
|
||||
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
|
||||
sys.exit(0)
|
||||
|
||||
if 'RUN_BY_HASH_NUM' in os.environ:
|
||||
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
|
||||
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
|
||||
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
|
||||
if "RUN_BY_HASH_NUM" in os.environ:
|
||||
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
|
||||
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
|
||||
check_name_with_group = (
|
||||
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
|
||||
)
|
||||
else:
|
||||
run_by_hash_num = 0
|
||||
run_by_hash_total = 0
|
||||
@ -191,12 +245,18 @@ if __name__ == "__main__":
|
||||
tests_to_run = get_tests_to_run(pr_info)
|
||||
if not tests_to_run:
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
state = override_status('success', check_name, validate_bugix_check)
|
||||
if args.post_commit_status == 'commit_status':
|
||||
commit.create_status(context=check_name_with_group, description=NO_CHANGES_MSG, state=state)
|
||||
elif args.post_commit_status == 'file':
|
||||
state = override_status("success", check_name, validate_bugix_check)
|
||||
if args.post_commit_status == "commit_status":
|
||||
commit.create_status(
|
||||
context=check_name_with_group,
|
||||
description=NO_CHANGES_MSG,
|
||||
state=state,
|
||||
)
|
||||
elif args.post_commit_status == "file":
|
||||
fpath = os.path.join(temp_path, "post_commit_status.tsv")
|
||||
post_commit_status_to_file(fpath, description=NO_CHANGES_MSG, state=state, report_url='null')
|
||||
post_commit_status_to_file(
|
||||
fpath, description=NO_CHANGES_MSG, state=state, report_url="null"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
image_name = get_image_name(check_name)
|
||||
@ -223,11 +283,23 @@ if __name__ == "__main__":
|
||||
|
||||
run_log_path = os.path.join(result_path, "runlog.log")
|
||||
|
||||
additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total)
|
||||
additional_envs = get_additional_envs(
|
||||
check_name, run_by_hash_num, run_by_hash_total
|
||||
)
|
||||
if validate_bugix_check:
|
||||
additional_envs.append('GLOBAL_TAGS=no-random-settings')
|
||||
additional_envs.append("GLOBAL_TAGS=no-random-settings")
|
||||
|
||||
run_command = get_run_command(packages_path, repo_tests_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run)
|
||||
run_command = get_run_command(
|
||||
packages_path,
|
||||
repo_tests_path,
|
||||
result_path,
|
||||
server_log_path,
|
||||
kill_timeout,
|
||||
additional_envs,
|
||||
docker_image,
|
||||
flaky_check,
|
||||
tests_to_run,
|
||||
)
|
||||
logging.info("Going to run func tests: %s", run_command)
|
||||
|
||||
with TeePopen(run_command, run_log_path) as process:
|
||||
@ -239,29 +311,55 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
state, description, test_results, additional_logs = process_results(result_path, server_log_path)
|
||||
state, description, test_results, additional_logs = process_results(
|
||||
result_path, server_log_path
|
||||
)
|
||||
state = override_status(state, check_name, validate_bugix_check)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, check_name, test_results)
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group)
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
[run_log_path] + additional_logs,
|
||||
check_name_with_group,
|
||||
)
|
||||
|
||||
print(f"::notice:: {check_name} Report url: {report_url}")
|
||||
if args.post_commit_status == 'commit_status':
|
||||
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
|
||||
elif args.post_commit_status == 'file':
|
||||
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url)
|
||||
if args.post_commit_status == "commit_status":
|
||||
post_commit_status(
|
||||
gh, pr_info.sha, check_name_with_group, description, state, report_url
|
||||
)
|
||||
elif args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
description,
|
||||
state,
|
||||
report_url,
|
||||
)
|
||||
else:
|
||||
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"')
|
||||
raise Exception(
|
||||
f'Unknown post_commit_status option "{args.post_commit_status}"'
|
||||
)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name_with_group,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
||||
if state != 'success':
|
||||
if 'force-tests' in pr_info.labels:
|
||||
if state != "success":
|
||||
if "force-tests" in pr_info.labels:
|
||||
print("'force-tests' enabled, will report success")
|
||||
else:
|
||||
sys.exit(1)
|
||||
|
@ -2,13 +2,15 @@
|
||||
import boto3 # type: ignore
|
||||
from github import Github # type: ignore
|
||||
|
||||
|
||||
def get_parameter_from_ssm(name, decrypt=True, client=None):
|
||||
if not client:
|
||||
client = boto3.client('ssm', region_name='us-east-1')
|
||||
return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value']
|
||||
client = boto3.client("ssm", region_name="us-east-1")
|
||||
return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
|
||||
|
||||
|
||||
def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4):
|
||||
client = boto3.client('ssm', region_name='us-east-1')
|
||||
client = boto3.client("ssm", region_name="us-east-1")
|
||||
tokens = {}
|
||||
for i in range(1, total_tokens + 1):
|
||||
token_name = token_prefix_env_name + str(i)
|
||||
|
@ -18,8 +18,16 @@ from build_download_helper import download_all_deb_packages
|
||||
from download_previous_release import download_previous_release
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_images_with_versions
|
||||
from commit_status_helper import post_commit_status, override_status, post_commit_status_to_file
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import (
|
||||
post_commit_status,
|
||||
override_status,
|
||||
post_commit_status_to_file,
|
||||
)
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
from tee_popen import TeePopen
|
||||
@ -41,24 +49,28 @@ IMAGES = [
|
||||
"clickhouse/dotnet-client",
|
||||
]
|
||||
|
||||
def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num):
|
||||
|
||||
def get_json_params_dict(
|
||||
check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num
|
||||
):
|
||||
return {
|
||||
'context_name': check_name,
|
||||
'commit': pr_info.sha,
|
||||
'pull_request': pr_info.number,
|
||||
'pr_info': {'changed_files' : list(pr_info.changed_files)},
|
||||
'docker_images_with_versions': docker_images,
|
||||
'shuffle_test_groups': False,
|
||||
'use_tmpfs': False,
|
||||
'disable_net_host': True,
|
||||
'run_by_hash_total': run_by_hash_total,
|
||||
'run_by_hash_num': run_by_hash_num,
|
||||
"context_name": check_name,
|
||||
"commit": pr_info.sha,
|
||||
"pull_request": pr_info.number,
|
||||
"pr_info": {"changed_files": list(pr_info.changed_files)},
|
||||
"docker_images_with_versions": docker_images,
|
||||
"shuffle_test_groups": False,
|
||||
"use_tmpfs": False,
|
||||
"disable_net_host": True,
|
||||
"run_by_hash_total": run_by_hash_total,
|
||||
"run_by_hash_num": run_by_hash_num,
|
||||
}
|
||||
|
||||
|
||||
def get_env_for_runner(build_path, repo_path, result_path, work_path):
|
||||
binary_path = os.path.join(build_path, 'clickhouse')
|
||||
odbc_bridge_path = os.path.join(build_path, 'clickhouse-odbc-bridge')
|
||||
library_bridge_path = os.path.join(build_path, 'clickhouse-library-bridge')
|
||||
binary_path = os.path.join(build_path, "clickhouse")
|
||||
odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge")
|
||||
library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge")
|
||||
|
||||
my_env = os.environ.copy()
|
||||
my_env["CLICKHOUSE_TESTS_BUILD_PATH"] = build_path
|
||||
@ -70,25 +82,30 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path):
|
||||
my_env["CLICKHOUSE_TESTS_RESULT_PATH"] = result_path
|
||||
my_env["CLICKHOUSE_TESTS_BASE_CONFIG_DIR"] = f"{repo_path}/programs/server"
|
||||
my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json")
|
||||
my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = '0'
|
||||
my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0"
|
||||
|
||||
return my_env
|
||||
|
||||
|
||||
def process_results(result_folder):
|
||||
test_results = []
|
||||
additional_files = []
|
||||
# Just upload all files from result_folder.
|
||||
# If task provides processed results, then it's responsible for content of result_folder.
|
||||
if os.path.exists(result_folder):
|
||||
test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))]
|
||||
test_files = [
|
||||
f
|
||||
for f in os.listdir(result_folder)
|
||||
if os.path.isfile(os.path.join(result_folder, f))
|
||||
]
|
||||
additional_files = [os.path.join(result_folder, f) for f in test_files]
|
||||
|
||||
status = []
|
||||
status_path = os.path.join(result_folder, "check_status.tsv")
|
||||
if os.path.exists(status_path):
|
||||
logging.info("Found test_results.tsv")
|
||||
with open(status_path, 'r', encoding='utf-8') as status_file:
|
||||
status = list(csv.reader(status_file, delimiter='\t'))
|
||||
with open(status_path, "r", encoding="utf-8") as status_file:
|
||||
status = list(csv.reader(status_file, delimiter="\t"))
|
||||
|
||||
if len(status) != 1 or len(status[0]) != 2:
|
||||
logging.info("Files in result folder %s", os.listdir(result_folder))
|
||||
@ -97,8 +114,8 @@ def process_results(result_folder):
|
||||
|
||||
results_path = os.path.join(result_folder, "test_results.tsv")
|
||||
if os.path.exists(results_path):
|
||||
with open(results_path, 'r', encoding='utf-8') as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter='\t'))
|
||||
with open(results_path, "r", encoding="utf-8") as results_file:
|
||||
test_results = list(csv.reader(results_file, delimiter="\t"))
|
||||
if len(test_results) == 0:
|
||||
return "error", "Empty test_results.tsv", test_results, additional_files
|
||||
|
||||
@ -108,8 +125,17 @@ def process_results(result_folder):
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("check_name")
|
||||
parser.add_argument("--validate-bugfix", action='store_true', help="Check that added tests failed on latest stable")
|
||||
parser.add_argument("--post-commit-status", default='commit_status', choices=['commit_status', 'file'], help="Where to public post commit status")
|
||||
parser.add_argument(
|
||||
"--validate-bugfix",
|
||||
action="store_true",
|
||||
help="Check that added tests failed on latest stable",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--post-commit-status",
|
||||
default="commit_status",
|
||||
choices=["commit_status", "file"],
|
||||
help="Where to public post commit status",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -126,10 +152,12 @@ if __name__ == "__main__":
|
||||
check_name = args.check_name
|
||||
validate_bugix_check = args.validate_bugfix
|
||||
|
||||
if 'RUN_BY_HASH_NUM' in os.environ:
|
||||
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
|
||||
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
|
||||
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
|
||||
if "RUN_BY_HASH_NUM" in os.environ:
|
||||
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
|
||||
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
|
||||
check_name_with_group = (
|
||||
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
|
||||
)
|
||||
else:
|
||||
run_by_hash_num = 0
|
||||
run_by_hash_total = 0
|
||||
@ -138,12 +166,17 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
is_flaky_check = 'flaky' in check_name
|
||||
is_flaky_check = "flaky" in check_name
|
||||
pr_info = PRInfo(need_changed_files=is_flaky_check or validate_bugix_check)
|
||||
|
||||
if validate_bugix_check and 'pr-bugfix' not in pr_info.labels:
|
||||
if args.post_commit_status == 'file':
|
||||
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), 'Skipped (no pr-bugfix)', 'success', 'null')
|
||||
if validate_bugix_check and "pr-bugfix" not in pr_info.labels:
|
||||
if args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
"Skipped (no pr-bugfix)",
|
||||
"success",
|
||||
"null",
|
||||
)
|
||||
logging.info("Skipping '%s' (no pr-bugfix)", check_name)
|
||||
sys.exit(0)
|
||||
|
||||
@ -175,9 +208,19 @@ if __name__ == "__main__":
|
||||
|
||||
my_env = get_env_for_runner(build_path, repo_path, result_path, work_path)
|
||||
|
||||
json_path = os.path.join(work_path, 'params.json')
|
||||
with open(json_path, 'w', encoding='utf-8') as json_params:
|
||||
json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num)))
|
||||
json_path = os.path.join(work_path, "params.json")
|
||||
with open(json_path, "w", encoding="utf-8") as json_params:
|
||||
json_params.write(
|
||||
json.dumps(
|
||||
get_json_params_dict(
|
||||
check_name,
|
||||
pr_info,
|
||||
images_with_versions,
|
||||
run_by_hash_total,
|
||||
run_by_hash_num,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
output_path_log = os.path.join(result_path, "main_script_log.txt")
|
||||
|
||||
@ -199,16 +242,41 @@ if __name__ == "__main__":
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, check_name, test_results)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False)
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
[output_path_log] + additional_logs,
|
||||
check_name_with_group,
|
||||
False,
|
||||
)
|
||||
|
||||
print(f"::notice:: {check_name} Report url: {report_url}")
|
||||
if args.post_commit_status == 'commit_status':
|
||||
post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url)
|
||||
elif args.post_commit_status == 'file':
|
||||
post_commit_status_to_file(os.path.join(temp_path, "post_commit_status.tsv"), description, state, report_url)
|
||||
if args.post_commit_status == "commit_status":
|
||||
post_commit_status(
|
||||
gh, pr_info.sha, check_name_with_group, description, state, report_url
|
||||
)
|
||||
elif args.post_commit_status == "file":
|
||||
post_commit_status_to_file(
|
||||
os.path.join(temp_path, "post_commit_status.tsv"),
|
||||
description,
|
||||
state,
|
||||
report_url,
|
||||
)
|
||||
else:
|
||||
raise Exception(f'Unknown post_commit_status option "{args.post_commit_status}"')
|
||||
raise Exception(
|
||||
f'Unknown post_commit_status option "{args.post_commit_status}"'
|
||||
)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name_with_group,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -24,10 +24,10 @@ from ssh import SSHKey
|
||||
from build_download_helper import get_build_name_for_check
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
JEPSEN_GROUP_NAME = 'jepsen_group'
|
||||
JEPSEN_GROUP_NAME = "jepsen_group"
|
||||
DESIRED_INSTANCE_COUNT = 3
|
||||
IMAGE_NAME = 'clickhouse/keeper-jepsen-test'
|
||||
CHECK_NAME = 'ClickHouse Keeper Jepsen (actions)'
|
||||
IMAGE_NAME = "clickhouse/keeper-jepsen-test"
|
||||
CHECK_NAME = "ClickHouse Keeper Jepsen (actions)"
|
||||
|
||||
|
||||
SUCCESSFUL_TESTS_ANCHOR = "# Successful tests"
|
||||
@ -35,45 +35,58 @@ INTERMINATE_TESTS_ANCHOR = "# Indeterminate tests"
|
||||
CRASHED_TESTS_ANCHOR = "# Crashed tests"
|
||||
FAILED_TESTS_ANCHOR = "# Failed tests"
|
||||
|
||||
|
||||
def _parse_jepsen_output(path):
|
||||
test_results = []
|
||||
current_type = ''
|
||||
with open(path, 'r') as f:
|
||||
current_type = ""
|
||||
with open(path, "r") as f:
|
||||
for line in f:
|
||||
if SUCCESSFUL_TESTS_ANCHOR in line:
|
||||
current_type = 'OK'
|
||||
current_type = "OK"
|
||||
elif INTERMINATE_TESTS_ANCHOR in line or CRASHED_TESTS_ANCHOR in line:
|
||||
current_type = 'ERROR'
|
||||
current_type = "ERROR"
|
||||
elif FAILED_TESTS_ANCHOR in line:
|
||||
current_type = 'FAIL'
|
||||
current_type = "FAIL"
|
||||
|
||||
if (line.startswith('store/clickhouse-keeper') or line.startswith('clickhouse-keeper')) and current_type:
|
||||
if (
|
||||
line.startswith("store/clickhouse-keeper")
|
||||
or line.startswith("clickhouse-keeper")
|
||||
) and current_type:
|
||||
test_results.append((line.strip(), current_type))
|
||||
|
||||
return test_results
|
||||
|
||||
|
||||
def get_autoscaling_group_instances_ids(asg_client, group_name):
|
||||
group_description = asg_client.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name])
|
||||
our_group = group_description['AutoScalingGroups'][0]
|
||||
group_description = asg_client.describe_auto_scaling_groups(
|
||||
AutoScalingGroupNames=[group_name]
|
||||
)
|
||||
our_group = group_description["AutoScalingGroups"][0]
|
||||
instance_ids = []
|
||||
for instance in our_group['Instances']:
|
||||
if instance['LifecycleState'] == 'InService' and instance['HealthStatus'] == 'Healthy':
|
||||
instance_ids.append(instance['InstanceId'])
|
||||
for instance in our_group["Instances"]:
|
||||
if (
|
||||
instance["LifecycleState"] == "InService"
|
||||
and instance["HealthStatus"] == "Healthy"
|
||||
):
|
||||
instance_ids.append(instance["InstanceId"])
|
||||
|
||||
return instance_ids
|
||||
|
||||
|
||||
def get_instances_addresses(ec2_client, instance_ids):
|
||||
ec2_response = ec2_client.describe_instances(InstanceIds = instance_ids)
|
||||
ec2_response = ec2_client.describe_instances(InstanceIds=instance_ids)
|
||||
instance_ips = []
|
||||
for instances in ec2_response['Reservations']:
|
||||
for ip in instances['Instances']:
|
||||
instance_ips.append(ip['PrivateIpAddress'])
|
||||
for instances in ec2_response["Reservations"]:
|
||||
for ip in instances["Instances"]:
|
||||
instance_ips.append(ip["PrivateIpAddress"])
|
||||
return instance_ips
|
||||
|
||||
|
||||
def prepare_autoscaling_group_and_get_hostnames():
|
||||
asg_client = boto3.client('autoscaling', region_name='us-east-1')
|
||||
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT)
|
||||
asg_client = boto3.client("autoscaling", region_name="us-east-1")
|
||||
asg_client.set_desired_capacity(
|
||||
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT
|
||||
)
|
||||
|
||||
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
|
||||
counter = 0
|
||||
@ -84,13 +97,15 @@ def prepare_autoscaling_group_and_get_hostnames():
|
||||
if counter > 30:
|
||||
raise Exception("Cannot wait autoscaling group")
|
||||
|
||||
ec2_client = boto3.client('ec2', region_name='us-east-1')
|
||||
ec2_client = boto3.client("ec2", region_name="us-east-1")
|
||||
return get_instances_addresses(ec2_client, instances)
|
||||
|
||||
|
||||
def clear_autoscaling_group():
|
||||
asg_client = boto3.client('autoscaling', region_name='us-east-1')
|
||||
asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0)
|
||||
asg_client = boto3.client("autoscaling", region_name="us-east-1")
|
||||
asg_client.set_desired_capacity(
|
||||
AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0
|
||||
)
|
||||
instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME)
|
||||
counter = 0
|
||||
while len(instances) > 0:
|
||||
@ -103,15 +118,28 @@ def clear_autoscaling_group():
|
||||
|
||||
def save_nodes_to_file(instances, temp_path):
|
||||
nodes_path = os.path.join(temp_path, "nodes.txt")
|
||||
with open(nodes_path, 'w') as f:
|
||||
with open(nodes_path, "w") as f:
|
||||
f.write("\n".join(instances))
|
||||
f.flush()
|
||||
return nodes_path
|
||||
|
||||
def get_run_command(ssh_auth_sock, ssh_sock_dir, pr_info, nodes_path, repo_path, build_url, result_path, docker_image):
|
||||
return f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} " \
|
||||
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output " \
|
||||
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
|
||||
|
||||
def get_run_command(
|
||||
ssh_auth_sock,
|
||||
ssh_sock_dir,
|
||||
pr_info,
|
||||
nodes_path,
|
||||
repo_path,
|
||||
build_url,
|
||||
result_path,
|
||||
docker_image,
|
||||
):
|
||||
return (
|
||||
f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} "
|
||||
f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output "
|
||||
f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
@ -120,9 +148,14 @@ if __name__ == "__main__":
|
||||
|
||||
pr_info = PRInfo()
|
||||
|
||||
logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels)
|
||||
logging.info(
|
||||
"Start at PR number %s, commit sha %s labels %s",
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
pr_info.labels,
|
||||
)
|
||||
|
||||
if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels:
|
||||
if pr_info.number != 0 and "jepsen-test" not in pr_info.labels:
|
||||
logging.info("Not jepsen test label in labels list, skipping")
|
||||
sys.exit(0)
|
||||
|
||||
@ -167,13 +200,24 @@ if __name__ == "__main__":
|
||||
head = requests.head(build_url)
|
||||
counter += 1
|
||||
if counter >= 180:
|
||||
post_commit_status(gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", "")
|
||||
post_commit_status(
|
||||
gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", ""
|
||||
)
|
||||
raise Exception("Cannot fetch build")
|
||||
|
||||
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + '\n'):
|
||||
ssh_auth_sock = os.environ['SSH_AUTH_SOCK']
|
||||
with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + "\n"):
|
||||
ssh_auth_sock = os.environ["SSH_AUTH_SOCK"]
|
||||
auth_sock_dir = os.path.dirname(ssh_auth_sock)
|
||||
cmd = get_run_command(ssh_auth_sock, auth_sock_dir, pr_info, nodes_path, REPO_COPY, build_url, result_path, docker_image)
|
||||
cmd = get_run_command(
|
||||
ssh_auth_sock,
|
||||
auth_sock_dir,
|
||||
pr_info,
|
||||
nodes_path,
|
||||
REPO_COPY,
|
||||
build_url,
|
||||
result_path,
|
||||
docker_image,
|
||||
)
|
||||
logging.info("Going to run jepsen: %s", cmd)
|
||||
|
||||
run_log_path = os.path.join(TEMP_PATH, "runlog.log")
|
||||
@ -185,31 +229,49 @@ if __name__ == "__main__":
|
||||
else:
|
||||
logging.info("Run failed")
|
||||
|
||||
status = 'success'
|
||||
description = 'No invalid analysis found ヽ(‘ー`)ノ'
|
||||
jepsen_log_path = os.path.join(result_path, 'jepsen_run_all_tests.log')
|
||||
status = "success"
|
||||
description = "No invalid analysis found ヽ(‘ー`)ノ"
|
||||
jepsen_log_path = os.path.join(result_path, "jepsen_run_all_tests.log")
|
||||
additional_data = []
|
||||
try:
|
||||
test_result = _parse_jepsen_output(jepsen_log_path)
|
||||
if any(r[1] == 'FAIL' for r in test_result):
|
||||
status = 'failure'
|
||||
description = 'Found invalid analysis (ノಥ益ಥ)ノ ┻━┻'
|
||||
if any(r[1] == "FAIL" for r in test_result):
|
||||
status = "failure"
|
||||
description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻"
|
||||
|
||||
compress_fast(os.path.join(result_path, 'store'), os.path.join(result_path, 'jepsen_store.tar.gz'))
|
||||
additional_data.append(os.path.join(result_path, 'jepsen_store.tar.gz'))
|
||||
compress_fast(
|
||||
os.path.join(result_path, "store"),
|
||||
os.path.join(result_path, "jepsen_store.tar.gz"),
|
||||
)
|
||||
additional_data.append(os.path.join(result_path, "jepsen_store.tar.gz"))
|
||||
except Exception as ex:
|
||||
print("Exception", ex)
|
||||
status = 'failure'
|
||||
description = 'No Jepsen output log'
|
||||
test_result = [('No Jepsen output log', 'FAIL')]
|
||||
status = "failure"
|
||||
description = "No Jepsen output log"
|
||||
test_result = [("No Jepsen output log", "FAIL")]
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_result, [run_log_path] + additional_data, CHECK_NAME)
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_result,
|
||||
[run_log_path] + additional_data,
|
||||
CHECK_NAME,
|
||||
)
|
||||
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_result,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
clear_autoscaling_group()
|
||||
|
@ -19,14 +19,26 @@ from commit_status_helper import get_commit, post_commit_status
|
||||
from tee_popen import TeePopen
|
||||
from rerun_helper import RerunHelper
|
||||
|
||||
IMAGE_NAME = 'clickhouse/performance-comparison'
|
||||
IMAGE_NAME = "clickhouse/performance-comparison"
|
||||
|
||||
def get_run_command(workspace, result_path, repo_tests_path, pr_to_test, sha_to_test, additional_env, image):
|
||||
return f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output " \
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test " \
|
||||
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " \
|
||||
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " \
|
||||
|
||||
def get_run_command(
|
||||
workspace,
|
||||
result_path,
|
||||
repo_tests_path,
|
||||
pr_to_test,
|
||||
sha_to_test,
|
||||
additional_env,
|
||||
image,
|
||||
):
|
||||
return (
|
||||
f"docker run --privileged --volume={workspace}:/workspace --volume={result_path}:/output "
|
||||
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
|
||||
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
|
||||
f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} "
|
||||
f"{image}"
|
||||
)
|
||||
|
||||
|
||||
class RamDrive:
|
||||
def __init__(self, path, size):
|
||||
@ -37,11 +49,14 @@ class RamDrive:
|
||||
if not os.path.exists(self.path):
|
||||
os.makedirs(self.path)
|
||||
|
||||
subprocess.check_call(f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True)
|
||||
subprocess.check_call(
|
||||
f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True
|
||||
)
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
subprocess.check_call(f"sudo umount {self.path}", shell=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
|
||||
@ -49,7 +64,7 @@ if __name__ == "__main__":
|
||||
repo_tests_path = os.path.join(repo_path, "tests")
|
||||
ramdrive_path = os.getenv("RAMDRIVE_PATH", os.path.join(temp_path, "ramdrive"))
|
||||
# currently unused, doesn't make tests more stable
|
||||
ramdrive_size = os.getenv("RAMDRIVE_SIZE", '0G')
|
||||
ramdrive_size = os.getenv("RAMDRIVE_SIZE", "0G")
|
||||
reports_path = os.getenv("REPORTS_PATH", "./reports")
|
||||
|
||||
check_name = sys.argv[1]
|
||||
@ -57,14 +72,14 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as event_file:
|
||||
with open(os.getenv("GITHUB_EVENT_PATH"), "r", encoding="utf-8") as event_file:
|
||||
event = json.load(event_file)
|
||||
|
||||
gh = Github(get_best_robot_token())
|
||||
pr_info = PRInfo(event)
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
|
||||
docker_env = ''
|
||||
docker_env = ""
|
||||
|
||||
docker_env += " -e S3_URL=https://s3.amazonaws.com/clickhouse-builds"
|
||||
|
||||
@ -75,13 +90,16 @@ if __name__ == "__main__":
|
||||
|
||||
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
|
||||
docker_env += ' -e CHPC_ADD_REPORT_LINKS="<a href={}>Job (actions)</a> <a href={}>Tested commit</a>"'.format(
|
||||
task_url, pr_link)
|
||||
task_url, pr_link
|
||||
)
|
||||
|
||||
if 'RUN_BY_HASH_TOTAL' in os.environ:
|
||||
run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL'))
|
||||
run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM'))
|
||||
docker_env += f' -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}'
|
||||
check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]'
|
||||
if "RUN_BY_HASH_TOTAL" in os.environ:
|
||||
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL"))
|
||||
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM"))
|
||||
docker_env += f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total} -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}"
|
||||
check_name_with_group = (
|
||||
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
|
||||
)
|
||||
else:
|
||||
check_name_with_group = check_name
|
||||
|
||||
@ -92,12 +110,20 @@ if __name__ == "__main__":
|
||||
|
||||
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
|
||||
|
||||
#with RamDrive(ramdrive_path, ramdrive_size):
|
||||
# with RamDrive(ramdrive_path, ramdrive_size):
|
||||
result_path = ramdrive_path
|
||||
if not os.path.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
run_command = get_run_command(result_path, result_path, repo_tests_path, pr_info.number, pr_info.sha, docker_env, docker_image)
|
||||
run_command = get_run_command(
|
||||
result_path,
|
||||
result_path,
|
||||
repo_tests_path,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
docker_env,
|
||||
docker_image,
|
||||
)
|
||||
logging.info("Going to run command %s", run_command)
|
||||
run_log_path = os.path.join(temp_path, "runlog.log")
|
||||
with TeePopen(run_command, run_log_path) as process:
|
||||
@ -110,74 +136,83 @@ if __name__ == "__main__":
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
paths = {
|
||||
'compare.log': os.path.join(result_path, 'compare.log'),
|
||||
'output.7z': os.path.join(result_path, 'output.7z'),
|
||||
'report.html': os.path.join(result_path, 'report.html'),
|
||||
'all-queries.html': os.path.join(result_path, 'all-queries.html'),
|
||||
'queries.rep': os.path.join(result_path, 'queries.rep'),
|
||||
'all-query-metrics.tsv': os.path.join(result_path, 'report/all-query-metrics.tsv'),
|
||||
'runlog.log': run_log_path,
|
||||
"compare.log": os.path.join(result_path, "compare.log"),
|
||||
"output.7z": os.path.join(result_path, "output.7z"),
|
||||
"report.html": os.path.join(result_path, "report.html"),
|
||||
"all-queries.html": os.path.join(result_path, "all-queries.html"),
|
||||
"queries.rep": os.path.join(result_path, "queries.rep"),
|
||||
"all-query-metrics.tsv": os.path.join(
|
||||
result_path, "report/all-query-metrics.tsv"
|
||||
),
|
||||
"runlog.log": run_log_path,
|
||||
}
|
||||
|
||||
check_name_prefix = check_name_with_group.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_')
|
||||
s3_prefix = f'{pr_info.number}/{pr_info.sha}/{check_name_prefix}/'
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
check_name_prefix = (
|
||||
check_name_with_group.lower()
|
||||
.replace(" ", "_")
|
||||
.replace("(", "_")
|
||||
.replace(")", "_")
|
||||
.replace(",", "_")
|
||||
)
|
||||
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/"
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
for file in paths:
|
||||
try:
|
||||
paths[file] = s3_helper.upload_test_report_to_s3(paths[file],
|
||||
s3_prefix + file)
|
||||
paths[file] = s3_helper.upload_test_report_to_s3(
|
||||
paths[file], s3_prefix + file
|
||||
)
|
||||
except Exception:
|
||||
paths[file] = ''
|
||||
paths[file] = ""
|
||||
traceback.print_exc()
|
||||
|
||||
# Upload all images and flamegraphs to S3
|
||||
try:
|
||||
s3_helper.upload_test_folder_to_s3(
|
||||
os.path.join(result_path, 'images'),
|
||||
s3_prefix + 'images'
|
||||
os.path.join(result_path, "images"), s3_prefix + "images"
|
||||
)
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
|
||||
# Try to fetch status from the report.
|
||||
status = ''
|
||||
message = ''
|
||||
status = ""
|
||||
message = ""
|
||||
try:
|
||||
report_text = open(os.path.join(result_path, 'report.html'), 'r').read()
|
||||
status_match = re.search('<!--[ ]*status:(.*)-->', report_text)
|
||||
message_match = re.search('<!--[ ]*message:(.*)-->', report_text)
|
||||
report_text = open(os.path.join(result_path, "report.html"), "r").read()
|
||||
status_match = re.search("<!--[ ]*status:(.*)-->", report_text)
|
||||
message_match = re.search("<!--[ ]*message:(.*)-->", report_text)
|
||||
if status_match:
|
||||
status = status_match.group(1).strip()
|
||||
if message_match:
|
||||
message = message_match.group(1).strip()
|
||||
|
||||
# TODO: Remove me, always green mode for the first time
|
||||
status = 'success'
|
||||
status = "success"
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
status = 'failure'
|
||||
message = 'Failed to parse the report.'
|
||||
status = "failure"
|
||||
message = "Failed to parse the report."
|
||||
|
||||
if not status:
|
||||
status = 'failure'
|
||||
message = 'No status in report.'
|
||||
status = "failure"
|
||||
message = "No status in report."
|
||||
elif not message:
|
||||
status = 'failure'
|
||||
message = 'No message in report.'
|
||||
status = "failure"
|
||||
message = "No message in report."
|
||||
|
||||
report_url = task_url
|
||||
|
||||
if paths['runlog.log']:
|
||||
report_url = paths['runlog.log']
|
||||
if paths["runlog.log"]:
|
||||
report_url = paths["runlog.log"]
|
||||
|
||||
if paths['compare.log']:
|
||||
report_url = paths['compare.log']
|
||||
if paths["compare.log"]:
|
||||
report_url = paths["compare.log"]
|
||||
|
||||
if paths['output.7z']:
|
||||
report_url = paths['output.7z']
|
||||
if paths["output.7z"]:
|
||||
report_url = paths["output.7z"]
|
||||
|
||||
if paths['report.html']:
|
||||
report_url = paths['report.html']
|
||||
if paths["report.html"]:
|
||||
report_url = paths["report.html"]
|
||||
|
||||
|
||||
post_commit_status(gh, pr_info.sha, check_name_with_group, message, status, report_url)
|
||||
post_commit_status(
|
||||
gh, pr_info.sha, check_name_with_group, message, status, report_url
|
||||
)
|
||||
|
@ -92,27 +92,27 @@ HTML_TEST_PART = """
|
||||
</table>
|
||||
"""
|
||||
|
||||
BASE_HEADERS = ['Test name', 'Test status']
|
||||
BASE_HEADERS = ["Test name", "Test status"]
|
||||
|
||||
|
||||
class ReportColorTheme:
|
||||
class ReportColor:
|
||||
yellow = '#FFB400'
|
||||
red = '#F00'
|
||||
green = '#0A0'
|
||||
blue = '#00B4FF'
|
||||
yellow = "#FFB400"
|
||||
red = "#F00"
|
||||
green = "#0A0"
|
||||
blue = "#00B4FF"
|
||||
|
||||
default = (ReportColor.green, ReportColor.red, ReportColor.yellow)
|
||||
bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue)
|
||||
|
||||
|
||||
def _format_header(header, branch_name, branch_url=None):
|
||||
result = ' '.join([w.capitalize() for w in header.split(' ')])
|
||||
result = " ".join([w.capitalize() for w in header.split(" ")])
|
||||
result = result.replace("Clickhouse", "ClickHouse")
|
||||
result = result.replace("clickhouse", "ClickHouse")
|
||||
if 'ClickHouse' not in result:
|
||||
result = 'ClickHouse ' + result
|
||||
result += ' for '
|
||||
if "ClickHouse" not in result:
|
||||
result = "ClickHouse " + result
|
||||
result += " for "
|
||||
if branch_url:
|
||||
result += '<a href="{url}">{name}</a>'.format(url=branch_url, name=branch_name)
|
||||
else:
|
||||
@ -121,27 +121,27 @@ def _format_header(header, branch_name, branch_url=None):
|
||||
|
||||
|
||||
def _get_status_style(status, colortheme=None):
|
||||
ok_statuses = ('OK', 'success', 'PASSED')
|
||||
fail_statuses = ('FAIL', 'failure', 'error', 'FAILED', 'Timeout')
|
||||
ok_statuses = ("OK", "success", "PASSED")
|
||||
fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout")
|
||||
|
||||
if colortheme is None:
|
||||
colortheme = ReportColorTheme.default
|
||||
|
||||
style = "font-weight: bold;"
|
||||
if status in ok_statuses:
|
||||
style += f'color: {colortheme[0]};'
|
||||
style += f"color: {colortheme[0]};"
|
||||
elif status in fail_statuses:
|
||||
style += f'color: {colortheme[1]};'
|
||||
style += f"color: {colortheme[1]};"
|
||||
else:
|
||||
style += f'color: {colortheme[2]};'
|
||||
style += f"color: {colortheme[2]};"
|
||||
return style
|
||||
|
||||
|
||||
def _get_html_url_name(url):
|
||||
if isinstance(url, str):
|
||||
return os.path.basename(url).replace('%2B', '+').replace('%20', ' ')
|
||||
return os.path.basename(url).replace("%2B", "+").replace("%20", " ")
|
||||
if isinstance(url, tuple):
|
||||
return url[1].replace('%2B', '+').replace('%20', ' ')
|
||||
return url[1].replace("%2B", "+").replace("%20", " ")
|
||||
return None
|
||||
|
||||
|
||||
@ -153,11 +153,24 @@ def _get_html_url(url):
|
||||
if isinstance(url, tuple):
|
||||
href, name = url[0], _get_html_url_name(url)
|
||||
if href and name:
|
||||
return '<a href="{href}">{name}</a>'.format(href=href, name=_get_html_url_name(url))
|
||||
return ''
|
||||
return '<a href="{href}">{name}</a>'.format(
|
||||
href=href, name=_get_html_url_name(url)
|
||||
)
|
||||
return ""
|
||||
|
||||
|
||||
def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=None, with_raw_logs=False, statuscolors=None):
|
||||
def create_test_html_report(
|
||||
header,
|
||||
test_result,
|
||||
raw_log_url,
|
||||
task_url,
|
||||
branch_url,
|
||||
branch_name,
|
||||
commit_url,
|
||||
additional_urls=None,
|
||||
with_raw_logs=False,
|
||||
statuscolors=None,
|
||||
):
|
||||
if additional_urls is None:
|
||||
additional_urls = []
|
||||
|
||||
@ -181,9 +194,9 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
|
||||
has_test_logs = True
|
||||
|
||||
row = "<tr>"
|
||||
is_fail = test_status in ('FAIL', 'FLAKY')
|
||||
is_fail = test_status in ("FAIL", "FLAKY")
|
||||
if is_fail and with_raw_logs and test_logs is not None:
|
||||
row = "<tr class=\"failed\">"
|
||||
row = '<tr class="failed">'
|
||||
row += "<td>" + test_name + "</td>"
|
||||
style = _get_status_style(test_status, colortheme=statuscolors)
|
||||
|
||||
@ -193,7 +206,13 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
|
||||
num_fails = num_fails + 1
|
||||
is_fail_id = 'id="fail' + str(num_fails) + '" '
|
||||
|
||||
row += '<td ' + is_fail_id + 'style="{}">'.format(style) + test_status + "</td>"
|
||||
row += (
|
||||
"<td "
|
||||
+ is_fail_id
|
||||
+ 'style="{}">'.format(style)
|
||||
+ test_status
|
||||
+ "</td>"
|
||||
)
|
||||
|
||||
if test_time is not None:
|
||||
row += "<td>" + test_time + "</td>"
|
||||
@ -205,24 +224,26 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
|
||||
row += "</tr>"
|
||||
rows_part += row
|
||||
if test_logs is not None and with_raw_logs:
|
||||
row = "<tr class=\"failed-content\">"
|
||||
row = '<tr class="failed-content">'
|
||||
# TODO: compute colspan too
|
||||
row += "<td colspan=\"3\"><pre>" + test_logs + "</pre></td>"
|
||||
row += '<td colspan="3"><pre>' + test_logs + "</pre></td>"
|
||||
row += "</tr>"
|
||||
rows_part += row
|
||||
|
||||
headers = BASE_HEADERS
|
||||
if has_test_time:
|
||||
headers.append('Test time, sec.')
|
||||
headers.append("Test time, sec.")
|
||||
if has_test_logs and not with_raw_logs:
|
||||
headers.append('Logs')
|
||||
headers.append("Logs")
|
||||
|
||||
headers = ''.join(['<th>' + h + '</th>' for h in headers])
|
||||
headers = "".join(["<th>" + h + "</th>" for h in headers])
|
||||
test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part)
|
||||
else:
|
||||
test_part = ""
|
||||
|
||||
additional_html_urls = ' '.join([_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)])
|
||||
additional_html_urls = " ".join(
|
||||
[_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)]
|
||||
)
|
||||
|
||||
result = HTML_BASE_TEST_TEMPLATE.format(
|
||||
title=_format_header(header, branch_name),
|
||||
@ -233,7 +254,7 @@ def create_test_html_report(header, test_result, raw_log_url, task_url, branch_u
|
||||
test_part=test_part,
|
||||
branch_name=branch_name,
|
||||
commit_url=commit_url,
|
||||
additional_urls=additional_html_urls
|
||||
additional_urls=additional_html_urls,
|
||||
)
|
||||
return result
|
||||
|
||||
@ -297,9 +318,20 @@ tr:hover td {{filter: brightness(95%);}}
|
||||
LINK_TEMPLATE = '<a href="{url}">{text}</a>'
|
||||
|
||||
|
||||
def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url):
|
||||
def create_build_html_report(
|
||||
header,
|
||||
build_results,
|
||||
build_logs_urls,
|
||||
artifact_urls_list,
|
||||
task_url,
|
||||
branch_url,
|
||||
branch_name,
|
||||
commit_url,
|
||||
):
|
||||
rows = ""
|
||||
for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list):
|
||||
for (build_result, build_log_url, artifact_urls) in zip(
|
||||
build_results, build_logs_urls, artifact_urls_list
|
||||
):
|
||||
row = "<tr>"
|
||||
row += "<td>{}</td>".format(build_result.compiler)
|
||||
if build_result.build_type:
|
||||
@ -326,18 +358,20 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
|
||||
if build_result.elapsed_seconds:
|
||||
delta = datetime.timedelta(seconds=build_result.elapsed_seconds)
|
||||
else:
|
||||
delta = 'unknown'
|
||||
delta = "unknown"
|
||||
|
||||
row += '<td>{}</td>'.format(str(delta))
|
||||
row += "<td>{}</td>".format(str(delta))
|
||||
|
||||
links = ""
|
||||
link_separator = "<br/>"
|
||||
if artifact_urls:
|
||||
for artifact_url in artifact_urls:
|
||||
links += LINK_TEMPLATE.format(text=_get_html_url_name(artifact_url), url=artifact_url)
|
||||
links += LINK_TEMPLATE.format(
|
||||
text=_get_html_url_name(artifact_url), url=artifact_url
|
||||
)
|
||||
links += link_separator
|
||||
if links:
|
||||
links = links[:-len(link_separator)]
|
||||
links = links[: -len(link_separator)]
|
||||
row += "<td>{}</td>".format(links)
|
||||
|
||||
row += "</tr>"
|
||||
@ -348,4 +382,5 @@ def create_build_html_report(header, build_results, build_logs_urls, artifact_ur
|
||||
rows=rows,
|
||||
task_url=task_url,
|
||||
branch_name=branch_name,
|
||||
commit_url=commit_url)
|
||||
commit_url=commit_url,
|
||||
)
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
from commit_status_helper import get_commit
|
||||
|
||||
|
||||
def _filter_statuses(statuses):
|
||||
"""
|
||||
Squash statuses to latest state
|
||||
@ -19,7 +20,6 @@ def _filter_statuses(statuses):
|
||||
|
||||
|
||||
class RerunHelper:
|
||||
|
||||
def __init__(self, gh, pr_info, check_name):
|
||||
self.gh = gh
|
||||
self.pr_info = pr_info
|
||||
@ -30,6 +30,9 @@ class RerunHelper:
|
||||
def is_already_finished_by_status(self):
|
||||
# currently we agree even for failed statuses
|
||||
for status in self.statuses:
|
||||
if self.check_name in status.context and status.state in ('success', 'failure'):
|
||||
if self.check_name in status.context and status.state in (
|
||||
"success",
|
||||
"failure",
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
@ -34,30 +34,59 @@ def _flatten_list(lst):
|
||||
|
||||
class S3Helper:
|
||||
def __init__(self, host):
|
||||
self.session = boto3.session.Session(region_name='us-east-1')
|
||||
self.client = self.session.client('s3', endpoint_url=host)
|
||||
self.session = boto3.session.Session(region_name="us-east-1")
|
||||
self.client = self.session.client("s3", endpoint_url=host)
|
||||
|
||||
def _upload_file_to_s3(self, bucket_name, file_path, s3_path):
|
||||
logging.debug("Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path)
|
||||
logging.debug(
|
||||
"Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path
|
||||
)
|
||||
metadata = {}
|
||||
if os.path.getsize(file_path) < 64 * 1024 * 1024:
|
||||
if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"):
|
||||
metadata['ContentType'] = "text/plain; charset=utf-8"
|
||||
logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path)
|
||||
if (
|
||||
s3_path.endswith("txt")
|
||||
or s3_path.endswith("log")
|
||||
or s3_path.endswith("err")
|
||||
or s3_path.endswith("out")
|
||||
):
|
||||
metadata["ContentType"] = "text/plain; charset=utf-8"
|
||||
logging.info(
|
||||
"Content type %s for file path %s",
|
||||
"text/plain; charset=utf-8",
|
||||
file_path,
|
||||
)
|
||||
elif s3_path.endswith("html"):
|
||||
metadata['ContentType'] = "text/html; charset=utf-8"
|
||||
logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path)
|
||||
metadata["ContentType"] = "text/html; charset=utf-8"
|
||||
logging.info(
|
||||
"Content type %s for file path %s",
|
||||
"text/html; charset=utf-8",
|
||||
file_path,
|
||||
)
|
||||
elif s3_path.endswith("css"):
|
||||
metadata['ContentType'] = "text/css; charset=utf-8"
|
||||
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path)
|
||||
metadata["ContentType"] = "text/css; charset=utf-8"
|
||||
logging.info(
|
||||
"Content type %s for file path %s",
|
||||
"text/css; charset=utf-8",
|
||||
file_path,
|
||||
)
|
||||
elif s3_path.endswith("js"):
|
||||
metadata['ContentType'] = "text/javascript; charset=utf-8"
|
||||
logging.info("Content type %s for file path %s", "text/css; charset=utf-8", file_path)
|
||||
metadata["ContentType"] = "text/javascript; charset=utf-8"
|
||||
logging.info(
|
||||
"Content type %s for file path %s",
|
||||
"text/css; charset=utf-8",
|
||||
file_path,
|
||||
)
|
||||
else:
|
||||
logging.info("No content type provied for %s", file_path)
|
||||
else:
|
||||
if re.search(r'\.(txt|log|err|out)$', s3_path) or re.search(r'\.log\..*(?<!\.gz)$', s3_path):
|
||||
logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz")
|
||||
if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search(
|
||||
r"\.log\..*(?<!\.gz)$", s3_path
|
||||
):
|
||||
logging.info(
|
||||
"Going to compress file log file %s to %s",
|
||||
file_path,
|
||||
file_path + ".gz",
|
||||
)
|
||||
compress_file_fast(file_path, file_path + ".gz")
|
||||
file_path += ".gz"
|
||||
s3_path += ".gz"
|
||||
@ -69,14 +98,21 @@ class S3Helper:
|
||||
logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata)
|
||||
# last two replacements are specifics of AWS urls:
|
||||
# https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/
|
||||
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) \
|
||||
.replace('+', '%2B').replace(' ', '%20')
|
||||
return (
|
||||
"https://s3.amazonaws.com/{bucket}/{path}".format(
|
||||
bucket=bucket_name, path=s3_path
|
||||
)
|
||||
.replace("+", "%2B")
|
||||
.replace(" ", "%20")
|
||||
)
|
||||
|
||||
def upload_test_report_to_s3(self, file_path, s3_path):
|
||||
if CI:
|
||||
return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
|
||||
else:
|
||||
return S3Helper.copy_file_to_local(S3_TEST_REPORTS_BUCKET, file_path, s3_path)
|
||||
return S3Helper.copy_file_to_local(
|
||||
S3_TEST_REPORTS_BUCKET, file_path, s3_path
|
||||
)
|
||||
|
||||
def upload_build_file_to_s3(self, file_path, s3_path):
|
||||
if CI:
|
||||
@ -96,6 +132,7 @@ class S3Helper:
|
||||
counter = 0
|
||||
t = time.time()
|
||||
sum_time = 0
|
||||
|
||||
def upload_task(file_path):
|
||||
nonlocal counter
|
||||
nonlocal t
|
||||
@ -104,16 +141,18 @@ class S3Helper:
|
||||
s3_path = file_path.replace(dir_path, s3_dir_path)
|
||||
metadata = {}
|
||||
if s3_path.endswith("html"):
|
||||
metadata['ContentType'] = "text/html; charset=utf-8"
|
||||
metadata["ContentType"] = "text/html; charset=utf-8"
|
||||
elif s3_path.endswith("css"):
|
||||
metadata['ContentType'] = "text/css; charset=utf-8"
|
||||
metadata["ContentType"] = "text/css; charset=utf-8"
|
||||
elif s3_path.endswith("js"):
|
||||
metadata['ContentType'] = "text/javascript; charset=utf-8"
|
||||
metadata["ContentType"] = "text/javascript; charset=utf-8"
|
||||
|
||||
# Retry
|
||||
for i in range(5):
|
||||
try:
|
||||
self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata)
|
||||
self.client.upload_file(
|
||||
file_path, bucket_name, s3_path, ExtraArgs=metadata
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
if i == 4:
|
||||
@ -123,11 +162,22 @@ class S3Helper:
|
||||
counter += 1
|
||||
if counter % 1000 == 0:
|
||||
sum_time += int(time.time() - t)
|
||||
print("Uploaded", counter, "-", int(time.time() - t), "s", "sum time", sum_time, "s")
|
||||
print(
|
||||
"Uploaded",
|
||||
counter,
|
||||
"-",
|
||||
int(time.time() - t),
|
||||
"s",
|
||||
"sum time",
|
||||
sum_time,
|
||||
"s",
|
||||
)
|
||||
t = time.time()
|
||||
except Exception as ex:
|
||||
logging.critical("Failed to upload file, expcetion %s", ex)
|
||||
return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path)
|
||||
return "https://s3.amazonaws.com/{bucket}/{path}".format(
|
||||
bucket=bucket_name, path=s3_path
|
||||
)
|
||||
|
||||
p = Pool(256)
|
||||
|
||||
@ -136,8 +186,20 @@ class S3Helper:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
return result
|
||||
|
||||
def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks):
|
||||
logging.info("Upload folder '%s' to bucket=%s of s3 folder '%s'", folder_path, bucket_name, s3_folder_path)
|
||||
def _upload_folder_to_s3(
|
||||
self,
|
||||
folder_path,
|
||||
s3_folder_path,
|
||||
bucket_name,
|
||||
keep_dirs_in_s3_path,
|
||||
upload_symlinks,
|
||||
):
|
||||
logging.info(
|
||||
"Upload folder '%s' to bucket=%s of s3 folder '%s'",
|
||||
folder_path,
|
||||
bucket_name,
|
||||
s3_folder_path,
|
||||
)
|
||||
if not os.path.exists(folder_path):
|
||||
return []
|
||||
files = os.listdir(folder_path)
|
||||
@ -154,44 +216,81 @@ class S3Helper:
|
||||
full_s3_path = s3_folder_path
|
||||
|
||||
if os.path.isdir(full_fs_path):
|
||||
return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path,
|
||||
upload_symlinks)
|
||||
return self._upload_folder_to_s3(
|
||||
full_fs_path,
|
||||
full_s3_path,
|
||||
bucket_name,
|
||||
keep_dirs_in_s3_path,
|
||||
upload_symlinks,
|
||||
)
|
||||
|
||||
if os.path.islink(full_fs_path):
|
||||
if upload_symlinks:
|
||||
if CI:
|
||||
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
|
||||
return self._upload_file_to_s3(
|
||||
bucket_name, full_fs_path, full_s3_path + "/" + file_name
|
||||
)
|
||||
else:
|
||||
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
|
||||
return S3Helper.copy_file_to_local(
|
||||
bucket_name, full_fs_path, full_s3_path + "/" + file_name
|
||||
)
|
||||
return []
|
||||
|
||||
if CI:
|
||||
return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
|
||||
return self._upload_file_to_s3(
|
||||
bucket_name, full_fs_path, full_s3_path + "/" + file_name
|
||||
)
|
||||
else:
|
||||
return S3Helper.copy_file_to_local(bucket_name, full_fs_path, full_s3_path + "/" + file_name)
|
||||
return S3Helper.copy_file_to_local(
|
||||
bucket_name, full_fs_path, full_s3_path + "/" + file_name
|
||||
)
|
||||
|
||||
return sorted(_flatten_list(list(p.map(task, files))))
|
||||
|
||||
def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True):
|
||||
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_BUILDS_BUCKET, keep_dirs_in_s3_path,
|
||||
upload_symlinks)
|
||||
def upload_build_folder_to_s3(
|
||||
self,
|
||||
folder_path,
|
||||
s3_folder_path,
|
||||
keep_dirs_in_s3_path=True,
|
||||
upload_symlinks=True,
|
||||
):
|
||||
return self._upload_folder_to_s3(
|
||||
folder_path,
|
||||
s3_folder_path,
|
||||
S3_BUILDS_BUCKET,
|
||||
keep_dirs_in_s3_path,
|
||||
upload_symlinks,
|
||||
)
|
||||
|
||||
def upload_test_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True):
|
||||
return self._upload_folder_to_s3(folder_path, s3_folder_path, S3_TEST_REPORTS_BUCKET, keep_dirs_in_s3_path,
|
||||
upload_symlinks)
|
||||
def upload_test_folder_to_s3(
|
||||
self,
|
||||
folder_path,
|
||||
s3_folder_path,
|
||||
keep_dirs_in_s3_path=True,
|
||||
upload_symlinks=True,
|
||||
):
|
||||
return self._upload_folder_to_s3(
|
||||
folder_path,
|
||||
s3_folder_path,
|
||||
S3_TEST_REPORTS_BUCKET,
|
||||
keep_dirs_in_s3_path,
|
||||
upload_symlinks,
|
||||
)
|
||||
|
||||
def list_prefix(self, s3_prefix_path, bucket=S3_BUILDS_BUCKET):
|
||||
objects = self.client.list_objects_v2(Bucket=bucket, Prefix=s3_prefix_path)
|
||||
result = []
|
||||
if 'Contents' in objects:
|
||||
for obj in objects['Contents']:
|
||||
result.append(obj['Key'])
|
||||
if "Contents" in objects:
|
||||
for obj in objects["Contents"]:
|
||||
result.append(obj["Key"])
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def copy_file_to_local(bucket_name, file_path, s3_path):
|
||||
local_path = os.path.abspath(os.path.join(RUNNER_TEMP, 's3', bucket_name, s3_path))
|
||||
local_path = os.path.abspath(
|
||||
os.path.join(RUNNER_TEMP, "s3", bucket_name, s3_path)
|
||||
)
|
||||
local_dir = os.path.dirname(local_path)
|
||||
if not os.path.exists(local_dir):
|
||||
os.makedirs(local_dir)
|
||||
|
@ -23,19 +23,20 @@ from rerun_helper import RerunHelper
|
||||
DOCKER_IMAGE = "clickhouse/split-build-smoke-test"
|
||||
DOWNLOAD_RETRIES_COUNT = 5
|
||||
RESULT_LOG_NAME = "run.log"
|
||||
CHECK_NAME = 'Split build smoke test (actions)'
|
||||
CHECK_NAME = "Split build smoke test (actions)"
|
||||
|
||||
|
||||
def process_result(result_folder, server_log_folder):
|
||||
status = "success"
|
||||
description = 'Server started and responded'
|
||||
description = "Server started and responded"
|
||||
summary = [("Smoke test", "OK")]
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), 'r') as run_log:
|
||||
lines = run_log.read().split('\n')
|
||||
if not lines or lines[0].strip() != 'OK':
|
||||
with open(os.path.join(result_folder, RESULT_LOG_NAME), "r") as run_log:
|
||||
lines = run_log.read().split("\n")
|
||||
if not lines or lines[0].strip() != "OK":
|
||||
status = "failure"
|
||||
logging.info("Lines is not ok: %s", str('\n'.join(lines)))
|
||||
logging.info("Lines is not ok: %s", str("\n".join(lines)))
|
||||
summary = [("Smoke test", "FAIL")]
|
||||
description = 'Server failed to respond, see result in logs'
|
||||
description = "Server failed to respond, see result in logs"
|
||||
|
||||
result_logs = []
|
||||
server_log_path = os.path.join(server_log_folder, "clickhouse-server.log")
|
||||
@ -43,17 +44,25 @@ def process_result(result_folder, server_log_folder):
|
||||
client_stderr_log_path = os.path.join(result_folder, "clientstderr.log")
|
||||
run_log_path = os.path.join(result_folder, RESULT_LOG_NAME)
|
||||
|
||||
for path in [server_log_path, stderr_log_path, client_stderr_log_path, run_log_path]:
|
||||
for path in [
|
||||
server_log_path,
|
||||
stderr_log_path,
|
||||
client_stderr_log_path,
|
||||
run_log_path,
|
||||
]:
|
||||
if os.path.exists(path):
|
||||
result_logs.append(path)
|
||||
|
||||
return status, description, summary, result_logs
|
||||
|
||||
|
||||
def get_run_command(build_path, result_folder, server_log_folder, docker_image):
|
||||
return f"docker run --network=host --volume={build_path}:/package_folder" \
|
||||
f" --volume={server_log_folder}:/var/log/clickhouse-server" \
|
||||
f" --volume={result_folder}:/test_output" \
|
||||
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}"
|
||||
return (
|
||||
f"docker run --network=host --volume={build_path}:/package_folder"
|
||||
f" --volume={server_log_folder}:/var/log/clickhouse-server"
|
||||
f" --volume={result_folder}:/test_output"
|
||||
f" {docker_image} >{result_folder}/{RESULT_LOG_NAME}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -76,8 +85,8 @@ if __name__ == "__main__":
|
||||
|
||||
for root, _, files in os.walk(reports_path):
|
||||
for f in files:
|
||||
if f == 'changed_images.json':
|
||||
images_path = os.path.join(root, 'changed_images.json')
|
||||
if f == "changed_images.json":
|
||||
images_path = os.path.join(root, "changed_images.json")
|
||||
break
|
||||
|
||||
docker_image = get_image_with_version(reports_path, DOCKER_IMAGE)
|
||||
@ -96,7 +105,9 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(result_path):
|
||||
os.makedirs(result_path)
|
||||
|
||||
run_command = get_run_command(packages_path, result_path, server_log_path, docker_image)
|
||||
run_command = get_run_command(
|
||||
packages_path, result_path, server_log_path, docker_image
|
||||
)
|
||||
|
||||
logging.info("Going to run command %s", run_command)
|
||||
with subprocess.Popen(run_command, shell=True) as process:
|
||||
@ -110,13 +121,30 @@ if __name__ == "__main__":
|
||||
print("Result path", os.listdir(result_path))
|
||||
print("Server log path", os.listdir(server_log_path))
|
||||
|
||||
state, description, test_results, additional_logs = process_result(result_path, server_log_path)
|
||||
state, description, test_results, additional_logs = process_result(
|
||||
result_path, server_log_path
|
||||
)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs, CHECK_NAME)
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
additional_logs,
|
||||
CHECK_NAME,
|
||||
)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, CHECK_NAME, description, state, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
CHECK_NAME,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -27,15 +27,19 @@ class SSHAgent:
|
||||
self._env_backup["SSH_OPTIONS"] = os.environ.get("SSH_OPTIONS")
|
||||
|
||||
# set ENV from stdout of ssh-agent
|
||||
for line in self._run(['ssh-agent']).splitlines():
|
||||
for line in self._run(["ssh-agent"]).splitlines():
|
||||
name, _, value = line.partition(b"=")
|
||||
if _ == b"=":
|
||||
value = value.split(b";", 1)[0]
|
||||
self._env[name.decode()] = value.decode()
|
||||
os.environ[name.decode()] = value.decode()
|
||||
|
||||
ssh_options = "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
|
||||
os.environ["SSH_OPTIONS"] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
|
||||
ssh_options = (
|
||||
"," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else ""
|
||||
)
|
||||
os.environ[
|
||||
"SSH_OPTIONS"
|
||||
] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no"
|
||||
|
||||
def add(self, key):
|
||||
key_pub = self._key_pub(key)
|
||||
@ -89,7 +93,13 @@ class SSHAgent:
|
||||
@staticmethod
|
||||
def _run(cmd, stdin=None):
|
||||
shell = isinstance(cmd, str)
|
||||
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE if stdin else None, shell=shell) as p:
|
||||
with subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE if stdin else None,
|
||||
shell=shell,
|
||||
) as p:
|
||||
stdout, stderr = p.communicate(stdin)
|
||||
|
||||
if stdout.strip().decode() == "The agent has no identities.":
|
||||
@ -101,6 +111,7 @@ class SSHAgent:
|
||||
|
||||
return stdout
|
||||
|
||||
|
||||
class SSHKey:
|
||||
def __init__(self, key_name=None, key_value=None):
|
||||
if key_name is None and key_value is None:
|
||||
|
@ -2,7 +2,8 @@
|
||||
|
||||
import datetime
|
||||
|
||||
class Stopwatch():
|
||||
|
||||
class Stopwatch:
|
||||
def __init__(self):
|
||||
self.start_time = datetime.datetime.utcnow()
|
||||
self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
@ -8,18 +8,19 @@ import json
|
||||
import time
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
def get_key_and_app_from_aws():
|
||||
import boto3
|
||||
|
||||
secret_name = "clickhouse_github_secret_key"
|
||||
session = boto3.session.Session()
|
||||
client = session.client(
|
||||
service_name='secretsmanager',
|
||||
service_name="secretsmanager",
|
||||
)
|
||||
get_secret_value_response = client.get_secret_value(
|
||||
SecretId=secret_name
|
||||
)
|
||||
data = json.loads(get_secret_value_response['SecretString'])
|
||||
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
|
||||
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
||||
data = json.loads(get_secret_value_response["SecretString"])
|
||||
return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
|
||||
|
||||
|
||||
def get_installation_id(jwt_token):
|
||||
headers = {
|
||||
@ -29,117 +30,152 @@ def get_installation_id(jwt_token):
|
||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data[0]['id']
|
||||
return data[0]["id"]
|
||||
|
||||
|
||||
def get_access_token(jwt_token, installation_id):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
|
||||
response = requests.post(
|
||||
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data['token']
|
||||
return data["token"]
|
||||
|
||||
|
||||
RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy'])
|
||||
RunnerDescription = namedtuple(
|
||||
"RunnerDescription", ["id", "name", "tags", "offline", "busy"]
|
||||
)
|
||||
|
||||
|
||||
def list_runners(access_token):
|
||||
headers = {
|
||||
"Authorization": f"token {access_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers)
|
||||
response = requests.get(
|
||||
"https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
total_runners = data['total_count']
|
||||
runners = data['runners']
|
||||
total_runners = data["total_count"]
|
||||
runners = data["runners"]
|
||||
|
||||
total_pages = int(total_runners / 100 + 1)
|
||||
for i in range(2, total_pages + 1):
|
||||
response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers)
|
||||
response = requests.get(
|
||||
f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
runners += data['runners']
|
||||
runners += data["runners"]
|
||||
|
||||
print("Total runners", len(runners))
|
||||
result = []
|
||||
for runner in runners:
|
||||
tags = [tag['name'] for tag in runner['labels']]
|
||||
desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags,
|
||||
offline=runner['status']=='offline', busy=runner['busy'])
|
||||
tags = [tag["name"] for tag in runner["labels"]]
|
||||
desc = RunnerDescription(
|
||||
id=runner["id"],
|
||||
name=runner["name"],
|
||||
tags=tags,
|
||||
offline=runner["status"] == "offline",
|
||||
busy=runner["busy"],
|
||||
)
|
||||
result.append(desc)
|
||||
return result
|
||||
|
||||
|
||||
def push_metrics_to_cloudwatch(listed_runners, namespace):
|
||||
import boto3
|
||||
client = boto3.client('cloudwatch')
|
||||
|
||||
client = boto3.client("cloudwatch")
|
||||
metrics_data = []
|
||||
busy_runners = sum(1 for runner in listed_runners if runner.busy)
|
||||
metrics_data.append({
|
||||
'MetricName': 'BusyRunners',
|
||||
'Value': busy_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
metrics_data.append(
|
||||
{
|
||||
"MetricName": "BusyRunners",
|
||||
"Value": busy_runners,
|
||||
"Unit": "Count",
|
||||
}
|
||||
)
|
||||
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
|
||||
metrics_data.append({
|
||||
'MetricName': 'ActiveRunners',
|
||||
'Value': total_active_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
metrics_data.append(
|
||||
{
|
||||
"MetricName": "ActiveRunners",
|
||||
"Value": total_active_runners,
|
||||
"Unit": "Count",
|
||||
}
|
||||
)
|
||||
total_runners = len(listed_runners)
|
||||
metrics_data.append({
|
||||
'MetricName': 'TotalRunners',
|
||||
'Value': total_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
metrics_data.append(
|
||||
{
|
||||
"MetricName": "TotalRunners",
|
||||
"Value": total_runners,
|
||||
"Unit": "Count",
|
||||
}
|
||||
)
|
||||
if total_active_runners == 0:
|
||||
busy_ratio = 100
|
||||
else:
|
||||
busy_ratio = busy_runners / total_active_runners * 100
|
||||
|
||||
metrics_data.append({
|
||||
'MetricName': 'BusyRunnersRatio',
|
||||
'Value': busy_ratio,
|
||||
'Unit': 'Percent',
|
||||
})
|
||||
metrics_data.append(
|
||||
{
|
||||
"MetricName": "BusyRunnersRatio",
|
||||
"Value": busy_ratio,
|
||||
"Unit": "Percent",
|
||||
}
|
||||
)
|
||||
|
||||
client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data)
|
||||
client.put_metric_data(Namespace="RunnersMetrics", MetricData=metrics_data)
|
||||
|
||||
|
||||
def how_many_instances_to_kill(event_data):
|
||||
data_array = event_data['CapacityToTerminate']
|
||||
data_array = event_data["CapacityToTerminate"]
|
||||
to_kill_by_zone = {}
|
||||
for av_zone in data_array:
|
||||
zone_name = av_zone['AvailabilityZone']
|
||||
to_kill = av_zone['Capacity']
|
||||
zone_name = av_zone["AvailabilityZone"]
|
||||
to_kill = av_zone["Capacity"]
|
||||
if zone_name not in to_kill_by_zone:
|
||||
to_kill_by_zone[zone_name] = 0
|
||||
|
||||
to_kill_by_zone[zone_name] += to_kill
|
||||
return to_kill_by_zone
|
||||
|
||||
|
||||
def get_candidates_to_be_killed(event_data):
|
||||
data_array = event_data['Instances']
|
||||
data_array = event_data["Instances"]
|
||||
instances_by_zone = {}
|
||||
for instance in data_array:
|
||||
zone_name = instance['AvailabilityZone']
|
||||
instance_id = instance['InstanceId']
|
||||
zone_name = instance["AvailabilityZone"]
|
||||
instance_id = instance["InstanceId"]
|
||||
if zone_name not in instances_by_zone:
|
||||
instances_by_zone[zone_name] = []
|
||||
instances_by_zone[zone_name].append(instance_id)
|
||||
|
||||
return instances_by_zone
|
||||
|
||||
|
||||
def delete_runner(access_token, runner):
|
||||
headers = {
|
||||
"Authorization": f"token {access_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
|
||||
response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers)
|
||||
response = requests.delete(
|
||||
f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
print(f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}")
|
||||
print(
|
||||
f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}"
|
||||
)
|
||||
return response.status_code == 204
|
||||
|
||||
|
||||
@ -166,12 +202,16 @@ def main(github_secret_key, github_app_id, event):
|
||||
num_to_kill = to_kill_by_zone[zone]
|
||||
candidates = instances_by_zone[zone]
|
||||
if num_to_kill > len(candidates):
|
||||
raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}")
|
||||
raise Exception(
|
||||
f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}"
|
||||
)
|
||||
|
||||
delete_for_av = []
|
||||
for candidate in candidates:
|
||||
if candidate not in set([runner.name for runner in runners]):
|
||||
print(f"Candidate {candidate} was not in runners list, simply delete it")
|
||||
print(
|
||||
f"Candidate {candidate} was not in runners list, simply delete it"
|
||||
)
|
||||
instances_to_kill.append(candidate)
|
||||
|
||||
for candidate in candidates:
|
||||
@ -183,57 +223,76 @@ def main(github_secret_key, github_app_id, event):
|
||||
for runner in runners:
|
||||
if runner.name == candidate:
|
||||
if not runner.busy:
|
||||
print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}")
|
||||
print(
|
||||
f"Runner {runner.name} is not busy and can be deleted from AV {zone}"
|
||||
)
|
||||
delete_for_av.append(runner)
|
||||
else:
|
||||
print(f"Runner {runner.name} is busy, not going to delete it")
|
||||
break
|
||||
|
||||
if len(delete_for_av) < num_to_kill:
|
||||
print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}")
|
||||
print(
|
||||
f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}"
|
||||
)
|
||||
to_delete_runners += delete_for_av
|
||||
|
||||
print("Got instances to kill: ", ', '.join(instances_to_kill))
|
||||
print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners]))
|
||||
print("Got instances to kill: ", ", ".join(instances_to_kill))
|
||||
print(
|
||||
"Going to delete runners:",
|
||||
", ".join([runner.name for runner in to_delete_runners]),
|
||||
)
|
||||
for runner in to_delete_runners:
|
||||
if delete_runner(access_token, runner):
|
||||
print(f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github")
|
||||
print(
|
||||
f"Runner with name {runner.name} and id {runner.id} successfuly deleted from github"
|
||||
)
|
||||
instances_to_kill.append(runner.name)
|
||||
else:
|
||||
print(f"Cannot delete {runner.name} from github")
|
||||
|
||||
## push metrics
|
||||
#runners = list_runners(access_token)
|
||||
#push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
|
||||
# runners = list_runners(access_token)
|
||||
# push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
|
||||
|
||||
response = {
|
||||
"InstanceIDs": instances_to_kill
|
||||
}
|
||||
response = {"InstanceIDs": instances_to_kill}
|
||||
print(response)
|
||||
return response
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
private_key, app_id = get_key_and_app_from_aws()
|
||||
return main(private_key, app_id, event)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Get list of runners and their states')
|
||||
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
|
||||
parser.add_argument('-k', '--private-key', help='Private key')
|
||||
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
|
||||
parser = argparse.ArgumentParser(description="Get list of runners and their states")
|
||||
parser.add_argument(
|
||||
"-p", "--private-key-path", help="Path to file with private key"
|
||||
)
|
||||
parser.add_argument("-k", "--private-key", help="Private key")
|
||||
parser.add_argument(
|
||||
"-a", "--app-id", type=int, help="GitHub application ID", required=True
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.private_key_path and not args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
print(
|
||||
"Either --private-key-path or --private-key must be specified",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if args.private_key_path and args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
print(
|
||||
"Either --private-key-path or --private-key must be specified",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if args.private_key:
|
||||
private_key = args.private_key
|
||||
else:
|
||||
with open(args.private_key_path, 'r') as key_file:
|
||||
with open(args.private_key_path, "r") as key_file:
|
||||
private_key = key_file.read()
|
||||
|
||||
sample_event = {
|
||||
@ -243,41 +302,41 @@ if __name__ == "__main__":
|
||||
{
|
||||
"AvailabilityZone": "us-east-1b",
|
||||
"Capacity": 1,
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
{
|
||||
"AvailabilityZone": "us-east-1c",
|
||||
"Capacity": 2,
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
}
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
],
|
||||
"Instances": [
|
||||
{
|
||||
"AvailabilityZone": "us-east-1b",
|
||||
"InstanceId": "i-08d0b3c1a137e02a5",
|
||||
"InstanceType": "t2.nano",
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
{
|
||||
"AvailabilityZone": "us-east-1c",
|
||||
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
|
||||
"InstanceType": "t2.nano",
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
{
|
||||
"AvailabilityZone": "us-east-1c",
|
||||
"InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal",
|
||||
"InstanceType": "t2.nano",
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
{
|
||||
"AvailabilityZone": "us-east-1c",
|
||||
"InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal",
|
||||
"InstanceType": "t2.nano",
|
||||
"InstanceMarketOption": "OnDemand"
|
||||
}
|
||||
"InstanceMarketOption": "OnDemand",
|
||||
},
|
||||
],
|
||||
"Cause": "SCALE_IN"
|
||||
"Cause": "SCALE_IN",
|
||||
}
|
||||
|
||||
main(private_key, args.app_id, sample_event)
|
||||
|
@ -7,6 +7,7 @@ import sys
|
||||
import json
|
||||
import time
|
||||
|
||||
|
||||
def get_installation_id(jwt_token):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
@ -15,40 +16,48 @@ def get_installation_id(jwt_token):
|
||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data[0]['id']
|
||||
return data[0]["id"]
|
||||
|
||||
|
||||
def get_access_token(jwt_token, installation_id):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
|
||||
response = requests.post(
|
||||
f"https://api.github.com/app/installations/{installation_id}/access_tokens",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data['token']
|
||||
return data["token"]
|
||||
|
||||
|
||||
def get_runner_registration_token(access_token):
|
||||
headers = {
|
||||
"Authorization": f"token {access_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers)
|
||||
response = requests.post(
|
||||
"https://api.github.com/orgs/ClickHouse/actions/runners/registration-token",
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data['token']
|
||||
return data["token"]
|
||||
|
||||
|
||||
def get_key_and_app_from_aws():
|
||||
import boto3
|
||||
|
||||
secret_name = "clickhouse_github_secret_key"
|
||||
session = boto3.session.Session()
|
||||
client = session.client(
|
||||
service_name='secretsmanager',
|
||||
service_name="secretsmanager",
|
||||
)
|
||||
get_secret_value_response = client.get_secret_value(
|
||||
SecretId=secret_name
|
||||
)
|
||||
data = json.loads(get_secret_value_response['SecretString'])
|
||||
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
|
||||
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
|
||||
data = json.loads(get_secret_value_response["SecretString"])
|
||||
return data["clickhouse-app-key"], int(data["clickhouse-app-id"])
|
||||
|
||||
|
||||
def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
|
||||
@ -67,40 +76,65 @@ def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name):
|
||||
import boto3
|
||||
|
||||
print("Trying to put params into ssm manager")
|
||||
client = boto3.client('ssm')
|
||||
client = boto3.client("ssm")
|
||||
client.put_parameter(
|
||||
Name=ssm_parameter_name,
|
||||
Value=runner_registration_token,
|
||||
Type='SecureString',
|
||||
Overwrite=True)
|
||||
Type="SecureString",
|
||||
Overwrite=True,
|
||||
)
|
||||
else:
|
||||
print("Not push token to AWS Parameter Store, just print:", runner_registration_token)
|
||||
print(
|
||||
"Not push token to AWS Parameter Store, just print:",
|
||||
runner_registration_token,
|
||||
)
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
private_key, app_id = get_key_and_app_from_aws()
|
||||
main(private_key, app_id, True, 'github_runner_registration_token')
|
||||
main(private_key, app_id, True, "github_runner_registration_token")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Get new token from github to add runners')
|
||||
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
|
||||
parser.add_argument('-k', '--private-key', help='Private key')
|
||||
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
|
||||
parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store')
|
||||
parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name')
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Get new token from github to add runners"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--private-key-path", help="Path to file with private key"
|
||||
)
|
||||
parser.add_argument("-k", "--private-key", help="Private key")
|
||||
parser.add_argument(
|
||||
"-a", "--app-id", type=int, help="GitHub application ID", required=True
|
||||
)
|
||||
parser.add_argument(
|
||||
"--push-to-ssm",
|
||||
action="store_true",
|
||||
help="Store received token in parameter store",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ssm-parameter-name",
|
||||
default="github_runner_registration_token",
|
||||
help="AWS paramater store parameter name",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.private_key_path and not args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
print(
|
||||
"Either --private-key-path or --private-key must be specified",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if args.private_key_path and args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
print(
|
||||
"Either --private-key-path or --private-key must be specified",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
if args.private_key:
|
||||
private_key = args.private_key
|
||||
else:
|
||||
with open(args.private_key_path, 'r') as key_file:
|
||||
with open(args.private_key_path, "r") as key_file:
|
||||
private_key = key_file.read()
|
||||
|
||||
main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name)
|
||||
|
@ -15,32 +15,38 @@ from build_download_helper import download_unit_tests
|
||||
from upload_result_helper import upload_results
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from commit_status_helper import post_commit_status
|
||||
from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from stopwatch import Stopwatch
|
||||
from rerun_helper import RerunHelper
|
||||
from tee_popen import TeePopen
|
||||
|
||||
|
||||
IMAGE_NAME = 'clickhouse/unit-test'
|
||||
IMAGE_NAME = "clickhouse/unit-test"
|
||||
|
||||
|
||||
def get_test_name(line):
|
||||
elements = reversed(line.split(' '))
|
||||
elements = reversed(line.split(" "))
|
||||
for element in elements:
|
||||
if '(' not in element and ')' not in element:
|
||||
if "(" not in element and ")" not in element:
|
||||
return element
|
||||
raise Exception(f"No test name in line '{line}'")
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
OK_SIGN = 'OK ]'
|
||||
FAILED_SIGN = 'FAILED ]'
|
||||
SEGFAULT = 'Segmentation fault'
|
||||
SIGNAL = 'received signal SIG'
|
||||
PASSED = 'PASSED'
|
||||
OK_SIGN = "OK ]"
|
||||
FAILED_SIGN = "FAILED ]"
|
||||
SEGFAULT = "Segmentation fault"
|
||||
SIGNAL = "received signal SIG"
|
||||
PASSED = "PASSED"
|
||||
|
||||
summary = []
|
||||
total_counter = 0
|
||||
failed_counter = 0
|
||||
result_log_path = f'{result_folder}/test_result.txt'
|
||||
result_log_path = f"{result_folder}/test_result.txt"
|
||||
if not os.path.exists(result_log_path):
|
||||
logging.info("No output log on path %s", result_log_path)
|
||||
return "error", "No output log", summary, []
|
||||
@ -48,7 +54,7 @@ def process_result(result_folder):
|
||||
status = "success"
|
||||
description = ""
|
||||
passed = False
|
||||
with open(result_log_path, 'r', encoding='utf-8') as test_result:
|
||||
with open(result_log_path, "r", encoding="utf-8") as test_result:
|
||||
for line in test_result:
|
||||
if OK_SIGN in line:
|
||||
logging.info("Found ok line: '%s'", line)
|
||||
@ -56,7 +62,7 @@ def process_result(result_folder):
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
summary.append((test_name, "OK"))
|
||||
total_counter += 1
|
||||
elif FAILED_SIGN in line and 'listed below' not in line and 'ms)' in line:
|
||||
elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
|
||||
logging.info("Found fail line: '%s'", line)
|
||||
test_name = get_test_name(line.strip())
|
||||
logging.info("Test name: '%s'", test_name)
|
||||
@ -85,7 +91,9 @@ def process_result(result_folder):
|
||||
status = "failure"
|
||||
|
||||
if not description:
|
||||
description += f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
|
||||
description += (
|
||||
f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
|
||||
)
|
||||
|
||||
return status, description, summary, [result_log_path]
|
||||
|
||||
@ -139,15 +147,30 @@ if __name__ == "__main__":
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
|
||||
s3_helper = S3Helper('https://s3.amazonaws.com')
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
state, description, test_results, additional_logs = process_result(test_output)
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, check_name, test_results)
|
||||
|
||||
report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name)
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
[run_log_path] + additional_logs,
|
||||
check_name,
|
||||
)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name)
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
check_name,
|
||||
)
|
||||
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
|
||||
|
@ -6,7 +6,9 @@ from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID
|
||||
from report import ReportColorTheme, create_test_html_report
|
||||
|
||||
|
||||
def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs):
|
||||
def process_logs(
|
||||
s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
|
||||
):
|
||||
processed_logs = {}
|
||||
# Firstly convert paths of logs from test_results to urls to s3.
|
||||
for test_result in test_results:
|
||||
@ -21,8 +23,8 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
|
||||
test_log_urls.append(processed_logs[log_path])
|
||||
elif log_path:
|
||||
url = s3_client.upload_test_report_to_s3(
|
||||
log_path,
|
||||
s3_path_prefix + "/" + os.path.basename(log_path))
|
||||
log_path, s3_path_prefix + "/" + os.path.basename(log_path)
|
||||
)
|
||||
test_log_urls.append(url)
|
||||
processed_logs[log_path] = url
|
||||
|
||||
@ -33,15 +35,29 @@ def process_logs(s3_client, additional_logs, s3_path_prefix, test_results, with_
|
||||
if log_path:
|
||||
additional_urls.append(
|
||||
s3_client.upload_test_report_to_s3(
|
||||
log_path,
|
||||
s3_path_prefix + "/" + os.path.basename(log_path)))
|
||||
log_path, s3_path_prefix + "/" + os.path.basename(log_path)
|
||||
)
|
||||
)
|
||||
|
||||
return additional_urls
|
||||
|
||||
|
||||
def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files, check_name, with_raw_logs=True, statuscolors=None):
|
||||
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(' ', '_').replace('(', '_').replace(')', '_').replace(',', '_')
|
||||
additional_urls = process_logs(s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs)
|
||||
def upload_results(
|
||||
s3_client,
|
||||
pr_number,
|
||||
commit_sha,
|
||||
test_results,
|
||||
additional_files,
|
||||
check_name,
|
||||
with_raw_logs=True,
|
||||
statuscolors=None,
|
||||
):
|
||||
s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(
|
||||
" ", "_"
|
||||
).replace("(", "_").replace(")", "_").replace(",", "_")
|
||||
additional_urls = process_logs(
|
||||
s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs
|
||||
)
|
||||
|
||||
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
|
||||
branch_name = "master"
|
||||
@ -58,14 +74,25 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi
|
||||
else:
|
||||
raw_log_url = task_url
|
||||
|
||||
statuscolors = ReportColorTheme.bugfixcheck if 'bugfix validate check' in check_name else None
|
||||
statuscolors = (
|
||||
ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None
|
||||
)
|
||||
|
||||
html_report = create_test_html_report(check_name, test_results, raw_log_url,
|
||||
task_url, branch_url, branch_name, commit_url,
|
||||
additional_urls, with_raw_logs, statuscolors=statuscolors)
|
||||
with open('report.html', 'w', encoding='utf-8') as f:
|
||||
html_report = create_test_html_report(
|
||||
check_name,
|
||||
test_results,
|
||||
raw_log_url,
|
||||
task_url,
|
||||
branch_url,
|
||||
branch_name,
|
||||
commit_url,
|
||||
additional_urls,
|
||||
with_raw_logs,
|
||||
statuscolors=statuscolors,
|
||||
)
|
||||
with open("report.html", "w", encoding="utf-8") as f:
|
||||
f.write(html_report)
|
||||
|
||||
url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html")
|
||||
url = s3_client.upload_test_report_to_s3("report.html", s3_path_prefix + ".html")
|
||||
logging.info("Search result in url %s", url)
|
||||
return url
|
||||
|
@ -27,7 +27,7 @@ MAX_TIME_SECONDS = 3600
|
||||
MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes
|
||||
TASK_TIMEOUT = 8 * 60 * 60 # 8 hours
|
||||
|
||||
NO_CHANGES_MSG = 'Nothing to run'
|
||||
NO_CHANGES_MSG = "Nothing to run"
|
||||
|
||||
|
||||
def stringhash(s):
|
||||
@ -209,7 +209,9 @@ class ClickhouseIntegrationTestsRunner:
|
||||
self.image_versions = self.params["docker_images_with_versions"]
|
||||
self.shuffle_groups = self.params["shuffle_test_groups"]
|
||||
self.flaky_check = "flaky check" in self.params["context_name"]
|
||||
self.bugfix_validate_check = "bugfix validate check" in self.params["context_name"]
|
||||
self.bugfix_validate_check = (
|
||||
"bugfix validate check" in self.params["context_name"]
|
||||
)
|
||||
# if use_tmpfs is not set we assume it to be true, otherwise check
|
||||
self.use_tmpfs = "use_tmpfs" not in self.params or self.params["use_tmpfs"]
|
||||
self.disable_net_host = (
|
||||
@ -780,7 +782,9 @@ class ClickhouseIntegrationTestsRunner:
|
||||
|
||||
def run_impl(self, repo_path, build_path):
|
||||
if self.flaky_check or self.bugfix_validate_check:
|
||||
return self.run_flaky_check(repo_path, build_path, should_fail=self.bugfix_validate_check)
|
||||
return self.run_flaky_check(
|
||||
repo_path, build_path, should_fail=self.bugfix_validate_check
|
||||
)
|
||||
|
||||
self._install_clickhouse(build_path)
|
||||
logging.info(
|
||||
|
@ -5,23 +5,34 @@ import os
|
||||
from helpers.test_tools import TSV
|
||||
from helpers.network import _NetworkManager
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="session")
|
||||
def cleanup_environment():
|
||||
try:
|
||||
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1:
|
||||
logging.debug(f"Cleaning all iptables rules")
|
||||
_NetworkManager.clean_all_user_iptables_rules()
|
||||
result = run_and_check(['docker ps | wc -l'], shell=True)
|
||||
result = run_and_check(["docker ps | wc -l"], shell=True)
|
||||
if int(result) > 1:
|
||||
if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1:
|
||||
logging.warning(f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"\
|
||||
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup.")
|
||||
logging.warning(
|
||||
f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"
|
||||
"You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup."
|
||||
)
|
||||
else:
|
||||
logging.debug("Trying to kill unstopped containers...")
|
||||
run_and_check([f'docker kill $(docker container list --all --quiet)'], shell=True, nothrow=True)
|
||||
run_and_check([f'docker rm $docker container list --all --quiet)'], shell=True, nothrow=True)
|
||||
run_and_check(
|
||||
[f"docker kill $(docker container list --all --quiet)"],
|
||||
shell=True,
|
||||
nothrow=True,
|
||||
)
|
||||
run_and_check(
|
||||
[f"docker rm $docker container list --all --quiet)"],
|
||||
shell=True,
|
||||
nothrow=True,
|
||||
)
|
||||
logging.debug("Unstopped containers killed")
|
||||
r = run_and_check(['docker-compose', 'ps', '--services', '--all'])
|
||||
r = run_and_check(["docker-compose", "ps", "--services", "--all"])
|
||||
logging.debug(f"Docker ps before start:{r.stdout}")
|
||||
else:
|
||||
logging.debug(f"No running containers")
|
||||
@ -31,8 +42,14 @@ def cleanup_environment():
|
||||
|
||||
yield
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption("--run-id", default="", help="run-id is used as postfix in _instances_{} directory")
|
||||
parser.addoption(
|
||||
"--run-id",
|
||||
default="",
|
||||
help="run-id is used as postfix in _instances_{} directory",
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
os.environ['INTEGRATION_TESTS_RUN_ID'] = config.option.run_id
|
||||
os.environ["INTEGRATION_TESTS_RUN_ID"] = config.option.run_id
|
||||
|
@ -6,79 +6,117 @@ from threading import Timer
|
||||
|
||||
|
||||
class Client:
|
||||
def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'):
|
||||
def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.command = [command]
|
||||
|
||||
if os.path.basename(command) == 'clickhouse':
|
||||
self.command.append('client')
|
||||
if os.path.basename(command) == "clickhouse":
|
||||
self.command.append("client")
|
||||
|
||||
self.command += ['--host', self.host, '--port', str(self.port), '--stacktrace']
|
||||
self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"]
|
||||
|
||||
def query(self, sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
ignore_error=False,
|
||||
query_id=None):
|
||||
return self.get_query_request(sql,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
ignore_error=ignore_error,
|
||||
query_id=query_id).get_answer()
|
||||
def query(
|
||||
self,
|
||||
sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
ignore_error=False,
|
||||
query_id=None,
|
||||
):
|
||||
return self.get_query_request(
|
||||
sql,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
ignore_error=ignore_error,
|
||||
query_id=query_id,
|
||||
).get_answer()
|
||||
|
||||
def get_query_request(self, sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
ignore_error=False,
|
||||
query_id=None):
|
||||
def get_query_request(
|
||||
self,
|
||||
sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
ignore_error=False,
|
||||
query_id=None,
|
||||
):
|
||||
command = self.command[:]
|
||||
|
||||
if stdin is None:
|
||||
command += ['--multiquery', '--testmode']
|
||||
command += ["--multiquery", "--testmode"]
|
||||
stdin = sql
|
||||
else:
|
||||
command += ['--query', sql]
|
||||
command += ["--query", sql]
|
||||
|
||||
if settings is not None:
|
||||
for setting, value in settings.items():
|
||||
command += ['--' + setting, str(value)]
|
||||
command += ["--" + setting, str(value)]
|
||||
|
||||
if user is not None:
|
||||
command += ['--user', user]
|
||||
command += ["--user", user]
|
||||
|
||||
if password is not None:
|
||||
command += ['--password', password]
|
||||
command += ["--password", password]
|
||||
|
||||
if database is not None:
|
||||
command += ['--database', database]
|
||||
command += ["--database", database]
|
||||
|
||||
if query_id is not None:
|
||||
command += ['--query_id', query_id]
|
||||
command += ["--query_id", query_id]
|
||||
|
||||
return CommandRequest(command, stdin, timeout, ignore_error)
|
||||
|
||||
def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None,
|
||||
database=None):
|
||||
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user,
|
||||
password=password, database=database).get_error()
|
||||
def query_and_get_error(
|
||||
self,
|
||||
sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
):
|
||||
return self.get_query_request(
|
||||
sql,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
).get_error()
|
||||
|
||||
def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None, password=None,
|
||||
database=None):
|
||||
return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user,
|
||||
password=password, database=database).get_answer_and_error()
|
||||
def query_and_get_answer_with_error(
|
||||
self,
|
||||
sql,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
password=None,
|
||||
database=None,
|
||||
):
|
||||
return self.get_query_request(
|
||||
sql,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
).get_answer_and_error()
|
||||
|
||||
|
||||
class QueryTimeoutExceedException(Exception):
|
||||
@ -95,7 +133,7 @@ class QueryRuntimeException(Exception):
|
||||
class CommandRequest:
|
||||
def __init__(self, command, stdin=None, timeout=None, ignore_error=False):
|
||||
# Write data to tmp file to avoid PIPEs and execution blocking
|
||||
stdin_file = tempfile.TemporaryFile(mode='w+')
|
||||
stdin_file = tempfile.TemporaryFile(mode="w+")
|
||||
stdin_file.write(stdin)
|
||||
stdin_file.seek(0)
|
||||
self.stdout_file = tempfile.TemporaryFile()
|
||||
@ -108,11 +146,19 @@ class CommandRequest:
|
||||
# can print some debug information there
|
||||
env = {}
|
||||
env["TSAN_OPTIONS"] = "verbosity=0"
|
||||
self.process = sp.Popen(command, stdin=stdin_file, stdout=self.stdout_file, stderr=self.stderr_file, env=env, universal_newlines=True)
|
||||
self.process = sp.Popen(
|
||||
command,
|
||||
stdin=stdin_file,
|
||||
stdout=self.stdout_file,
|
||||
stderr=self.stderr_file,
|
||||
env=env,
|
||||
universal_newlines=True,
|
||||
)
|
||||
|
||||
self.timer = None
|
||||
self.process_finished_before_timeout = True
|
||||
if timeout is not None:
|
||||
|
||||
def kill_process():
|
||||
if self.process.poll() is None:
|
||||
self.process_finished_before_timeout = False
|
||||
@ -126,16 +172,25 @@ class CommandRequest:
|
||||
self.stdout_file.seek(0)
|
||||
self.stderr_file.seek(0)
|
||||
|
||||
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
|
||||
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
|
||||
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
|
||||
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
|
||||
|
||||
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
|
||||
if (
|
||||
self.timer is not None
|
||||
and not self.process_finished_before_timeout
|
||||
and not self.ignore_error
|
||||
):
|
||||
logging.debug(f"Timed out. Last stdout:{stdout}, stderr:{stderr}")
|
||||
raise QueryTimeoutExceedException('Client timed out!')
|
||||
raise QueryTimeoutExceedException("Client timed out!")
|
||||
|
||||
if (self.process.returncode != 0 or stderr) and not self.ignore_error:
|
||||
raise QueryRuntimeException(
|
||||
'Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr), self.process.returncode, stderr)
|
||||
"Client failed! Return code: {}, stderr: {}".format(
|
||||
self.process.returncode, stderr
|
||||
),
|
||||
self.process.returncode,
|
||||
stderr,
|
||||
)
|
||||
|
||||
return stdout
|
||||
|
||||
@ -144,14 +199,22 @@ class CommandRequest:
|
||||
self.stdout_file.seek(0)
|
||||
self.stderr_file.seek(0)
|
||||
|
||||
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
|
||||
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
|
||||
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
|
||||
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
|
||||
|
||||
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
|
||||
raise QueryTimeoutExceedException('Client timed out!')
|
||||
if (
|
||||
self.timer is not None
|
||||
and not self.process_finished_before_timeout
|
||||
and not self.ignore_error
|
||||
):
|
||||
raise QueryTimeoutExceedException("Client timed out!")
|
||||
|
||||
if (self.process.returncode == 0):
|
||||
raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout), self.process.returncode, stderr)
|
||||
if self.process.returncode == 0:
|
||||
raise QueryRuntimeException(
|
||||
"Client expected to be failed but succeeded! stdout: {}".format(stdout),
|
||||
self.process.returncode,
|
||||
stderr,
|
||||
)
|
||||
|
||||
return stderr
|
||||
|
||||
@ -160,10 +223,14 @@ class CommandRequest:
|
||||
self.stdout_file.seek(0)
|
||||
self.stderr_file.seek(0)
|
||||
|
||||
stdout = self.stdout_file.read().decode('utf-8', errors='replace')
|
||||
stderr = self.stderr_file.read().decode('utf-8', errors='replace')
|
||||
stdout = self.stdout_file.read().decode("utf-8", errors="replace")
|
||||
stderr = self.stderr_file.read().decode("utf-8", errors="replace")
|
||||
|
||||
if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error:
|
||||
raise QueryTimeoutExceedException('Client timed out!')
|
||||
if (
|
||||
self.timer is not None
|
||||
and not self.process_finished_before_timeout
|
||||
and not self.ignore_error
|
||||
):
|
||||
raise QueryTimeoutExceedException("Client timed out!")
|
||||
|
||||
return (stdout, stderr)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +1,29 @@
|
||||
def corrupt_part_data_on_disk(node, table, part_name):
|
||||
part_path = node.query("SELECT path FROM system.parts WHERE table = '{}' and name = '{}'"
|
||||
.format(table, part_name)).strip()
|
||||
part_path = node.query(
|
||||
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
|
||||
table, part_name
|
||||
)
|
||||
).strip()
|
||||
|
||||
corrupt_part_data_by_path(node, part_path)
|
||||
|
||||
|
||||
def corrupt_part_data_by_path(node, part_path):
|
||||
print("Corrupting part", part_path, "at", node.name)
|
||||
print("Will corrupt: ",
|
||||
node.exec_in_container(['bash', '-c', 'cd {p} && ls *.bin | head -n 1'.format(p=part_path)]))
|
||||
print(
|
||||
"Will corrupt: ",
|
||||
node.exec_in_container(
|
||||
["bash", "-c", "cd {p} && ls *.bin | head -n 1".format(p=part_path)]
|
||||
),
|
||||
)
|
||||
|
||||
node.exec_in_container(['bash', '-c',
|
||||
'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format(
|
||||
p=part_path)], privileged=True)
|
||||
node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
|
||||
p=part_path
|
||||
),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
@ -4,18 +4,18 @@ import copy
|
||||
|
||||
class Layout(object):
|
||||
LAYOUTS_STR_DICT = {
|
||||
'flat': '<flat/>',
|
||||
'hashed': '<hashed/>',
|
||||
'cache': '<cache><size_in_cells>128</size_in_cells></cache>',
|
||||
'ssd_cache': '<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>',
|
||||
'complex_key_hashed': '<complex_key_hashed/>',
|
||||
'complex_key_hashed_one_key': '<complex_key_hashed/>',
|
||||
'complex_key_hashed_two_keys': '<complex_key_hashed/>',
|
||||
'complex_key_cache': '<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>',
|
||||
'complex_key_ssd_cache': '<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>',
|
||||
'range_hashed': '<range_hashed/>',
|
||||
'direct': '<direct/>',
|
||||
'complex_key_direct': '<complex_key_direct/>'
|
||||
"flat": "<flat/>",
|
||||
"hashed": "<hashed/>",
|
||||
"cache": "<cache><size_in_cells>128</size_in_cells></cache>",
|
||||
"ssd_cache": "<ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></ssd_cache>",
|
||||
"complex_key_hashed": "<complex_key_hashed/>",
|
||||
"complex_key_hashed_one_key": "<complex_key_hashed/>",
|
||||
"complex_key_hashed_two_keys": "<complex_key_hashed/>",
|
||||
"complex_key_cache": "<complex_key_cache><size_in_cells>128</size_in_cells></complex_key_cache>",
|
||||
"complex_key_ssd_cache": "<complex_key_ssd_cache><path>/etc/clickhouse-server/dictionaries/all</path></complex_key_ssd_cache>",
|
||||
"range_hashed": "<range_hashed/>",
|
||||
"direct": "<direct/>",
|
||||
"complex_key_direct": "<complex_key_direct/>",
|
||||
}
|
||||
|
||||
def __init__(self, name):
|
||||
@ -23,14 +23,14 @@ class Layout(object):
|
||||
self.is_complex = False
|
||||
self.is_simple = False
|
||||
self.is_ranged = False
|
||||
if self.name.startswith('complex'):
|
||||
self.layout_type = 'complex'
|
||||
if self.name.startswith("complex"):
|
||||
self.layout_type = "complex"
|
||||
self.is_complex = True
|
||||
elif name.startswith('range'):
|
||||
self.layout_type = 'ranged'
|
||||
elif name.startswith("range"):
|
||||
self.layout_type = "ranged"
|
||||
self.is_ranged = True
|
||||
else:
|
||||
self.layout_type = 'simple'
|
||||
self.layout_type = "simple"
|
||||
self.is_simple = True
|
||||
|
||||
def get_str(self):
|
||||
@ -38,8 +38,8 @@ class Layout(object):
|
||||
|
||||
def get_key_block_name(self):
|
||||
if self.is_complex:
|
||||
return 'key'
|
||||
return 'id'
|
||||
return "key"
|
||||
return "id"
|
||||
|
||||
|
||||
class Row(object):
|
||||
@ -59,8 +59,17 @@ class Row(object):
|
||||
|
||||
|
||||
class Field(object):
|
||||
def __init__(self, name, field_type, is_key=False, is_range_key=False, default=None, hierarchical=False,
|
||||
range_hash_type=None, default_value_for_get=None):
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
field_type,
|
||||
is_key=False,
|
||||
is_range_key=False,
|
||||
default=None,
|
||||
hierarchical=False,
|
||||
range_hash_type=None,
|
||||
default_value_for_get=None,
|
||||
):
|
||||
self.name = name
|
||||
self.field_type = field_type
|
||||
self.is_key = is_key
|
||||
@ -72,30 +81,32 @@ class Field(object):
|
||||
self.default_value_for_get = default_value_for_get
|
||||
|
||||
def get_attribute_str(self):
|
||||
return '''
|
||||
return """
|
||||
<attribute>
|
||||
<name>{name}</name>
|
||||
<type>{field_type}</type>
|
||||
<null_value>{default}</null_value>
|
||||
<hierarchical>{hierarchical}</hierarchical>
|
||||
</attribute>'''.format(
|
||||
</attribute>""".format(
|
||||
name=self.name,
|
||||
field_type=self.field_type,
|
||||
default=self.default if self.default else '',
|
||||
hierarchical='true' if self.hierarchical else 'false',
|
||||
default=self.default if self.default else "",
|
||||
hierarchical="true" if self.hierarchical else "false",
|
||||
)
|
||||
|
||||
def get_simple_index_str(self):
|
||||
return '<name>{name}</name>'.format(name=self.name)
|
||||
return "<name>{name}</name>".format(name=self.name)
|
||||
|
||||
def get_range_hash_str(self):
|
||||
if not self.range_hash_type:
|
||||
raise Exception("Field {} is not range hashed".format(self.name))
|
||||
return '''
|
||||
return """
|
||||
<range_{type}>
|
||||
<name>{name}</name>
|
||||
</range_{type}>
|
||||
'''.format(type=self.range_hash_type, name=self.name)
|
||||
""".format(
|
||||
type=self.range_hash_type, name=self.name
|
||||
)
|
||||
|
||||
|
||||
class DictionaryStructure(object):
|
||||
@ -125,9 +136,14 @@ class DictionaryStructure(object):
|
||||
|
||||
if not self.layout.is_complex and len(self.keys) > 1:
|
||||
raise Exception(
|
||||
"More than one key {} field in non complex layout {}".format(len(self.keys), self.layout.name))
|
||||
"More than one key {} field in non complex layout {}".format(
|
||||
len(self.keys), self.layout.name
|
||||
)
|
||||
)
|
||||
|
||||
if self.layout.is_ranged and (not self.range_key or len(self.range_fields) != 2):
|
||||
if self.layout.is_ranged and (
|
||||
not self.range_key or len(self.range_fields) != 2
|
||||
):
|
||||
raise Exception("Inconsistent configuration of ranged dictionary")
|
||||
|
||||
def get_structure_str(self):
|
||||
@ -148,7 +164,7 @@ class DictionaryStructure(object):
|
||||
for range_field in self.range_fields:
|
||||
ranged_strs.append(range_field.get_range_hash_str())
|
||||
|
||||
return '''
|
||||
return """
|
||||
<layout>
|
||||
{layout_str}
|
||||
</layout>
|
||||
@ -158,12 +174,12 @@ class DictionaryStructure(object):
|
||||
</{key_block_name}>
|
||||
{range_strs}
|
||||
{attributes_str}
|
||||
</structure>'''.format(
|
||||
</structure>""".format(
|
||||
layout_str=self.layout.get_str(),
|
||||
key_block_name=self.layout.get_key_block_name(),
|
||||
key_str='\n'.join(key_strs),
|
||||
attributes_str='\n'.join(fields_strs),
|
||||
range_strs='\n'.join(ranged_strs),
|
||||
key_str="\n".join(key_strs),
|
||||
attributes_str="\n".join(fields_strs),
|
||||
range_strs="\n".join(ranged_strs),
|
||||
)
|
||||
|
||||
def get_ordered_names(self):
|
||||
@ -179,15 +195,19 @@ class DictionaryStructure(object):
|
||||
def get_all_fields(self):
|
||||
return self.keys + self.range_fields + self.ordinary_fields
|
||||
|
||||
def _get_dict_get_common_expression(self, dict_name, field, row, or_default, with_type, has):
|
||||
def _get_dict_get_common_expression(
|
||||
self, dict_name, field, row, or_default, with_type, has
|
||||
):
|
||||
if field in self.keys:
|
||||
raise Exception("Trying to receive key field {} from dictionary".format(field.name))
|
||||
raise Exception(
|
||||
"Trying to receive key field {} from dictionary".format(field.name)
|
||||
)
|
||||
|
||||
if not self.layout.is_complex:
|
||||
if not or_default:
|
||||
key_expr = ', toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
key_expr = ", toUInt64({})".format(row.data[self.keys[0].name])
|
||||
else:
|
||||
key_expr = ', toUInt64({})'.format(self.keys[0].default_value_for_get)
|
||||
key_expr = ", toUInt64({})".format(self.keys[0].default_value_for_get)
|
||||
else:
|
||||
key_exprs_strs = []
|
||||
for key in self.keys:
|
||||
@ -197,48 +217,57 @@ class DictionaryStructure(object):
|
||||
val = key.default_value_for_get
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
key_exprs_strs.append('to{type}({value})'.format(type=key.field_type, value=val))
|
||||
key_expr = ', tuple(' + ','.join(key_exprs_strs) + ')'
|
||||
key_exprs_strs.append(
|
||||
"to{type}({value})".format(type=key.field_type, value=val)
|
||||
)
|
||||
key_expr = ", tuple(" + ",".join(key_exprs_strs) + ")"
|
||||
|
||||
date_expr = ''
|
||||
date_expr = ""
|
||||
if self.layout.is_ranged:
|
||||
val = row.data[self.range_key.name]
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
val = "to{type}({val})".format(type=self.range_key.field_type, val=val)
|
||||
|
||||
date_expr = ', ' + val
|
||||
date_expr = ", " + val
|
||||
|
||||
if or_default:
|
||||
raise Exception("Can create 'dictGetOrDefault' query for ranged dictionary")
|
||||
raise Exception(
|
||||
"Can create 'dictGetOrDefault' query for ranged dictionary"
|
||||
)
|
||||
|
||||
if or_default:
|
||||
or_default_expr = 'OrDefault'
|
||||
or_default_expr = "OrDefault"
|
||||
if field.default_value_for_get is None:
|
||||
raise Exception(
|
||||
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(field.name))
|
||||
"Can create 'dictGetOrDefault' query for field {} without default_value_for_get".format(
|
||||
field.name
|
||||
)
|
||||
)
|
||||
|
||||
val = field.default_value_for_get
|
||||
if isinstance(val, str):
|
||||
val = "'" + val + "'"
|
||||
default_value_for_get = ', to{type}({value})'.format(type=field.field_type, value=val)
|
||||
default_value_for_get = ", to{type}({value})".format(
|
||||
type=field.field_type, value=val
|
||||
)
|
||||
else:
|
||||
or_default_expr = ''
|
||||
default_value_for_get = ''
|
||||
or_default_expr = ""
|
||||
default_value_for_get = ""
|
||||
|
||||
if with_type:
|
||||
field_type = field.field_type
|
||||
else:
|
||||
field_type = ''
|
||||
field_type = ""
|
||||
|
||||
field_name = ", '" + field.name + "'"
|
||||
if has:
|
||||
what = "Has"
|
||||
field_type = ''
|
||||
or_default = ''
|
||||
field_name = ''
|
||||
date_expr = ''
|
||||
def_for_get = ''
|
||||
field_type = ""
|
||||
or_default = ""
|
||||
field_name = ""
|
||||
date_expr = ""
|
||||
def_for_get = ""
|
||||
else:
|
||||
what = "Get"
|
||||
|
||||
@ -255,28 +284,38 @@ class DictionaryStructure(object):
|
||||
|
||||
def get_get_expressions(self, dict_name, field, row):
|
||||
return [
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False, has=False),
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=True, has=False),
|
||||
self._get_dict_get_common_expression(
|
||||
dict_name, field, row, or_default=False, with_type=False, has=False
|
||||
),
|
||||
self._get_dict_get_common_expression(
|
||||
dict_name, field, row, or_default=False, with_type=True, has=False
|
||||
),
|
||||
]
|
||||
|
||||
def get_get_or_default_expressions(self, dict_name, field, row):
|
||||
if not self.layout.is_ranged:
|
||||
return [
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=False,
|
||||
has=False),
|
||||
self._get_dict_get_common_expression(dict_name, field, row, or_default=True, with_type=True, has=False),
|
||||
self._get_dict_get_common_expression(
|
||||
dict_name, field, row, or_default=True, with_type=False, has=False
|
||||
),
|
||||
self._get_dict_get_common_expression(
|
||||
dict_name, field, row, or_default=True, with_type=True, has=False
|
||||
),
|
||||
]
|
||||
return []
|
||||
|
||||
def get_has_expressions(self, dict_name, field, row):
|
||||
if not self.layout.is_ranged:
|
||||
return [self._get_dict_get_common_expression(dict_name, field, row, or_default=False, with_type=False,
|
||||
has=True)]
|
||||
return [
|
||||
self._get_dict_get_common_expression(
|
||||
dict_name, field, row, or_default=False, with_type=False, has=True
|
||||
)
|
||||
]
|
||||
return []
|
||||
|
||||
def get_hierarchical_expressions(self, dict_name, row):
|
||||
if self.layout.is_simple:
|
||||
key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
|
||||
return [
|
||||
"dictGetHierarchy('{dict_name}', {key})".format(
|
||||
dict_name=dict_name,
|
||||
@ -288,21 +327,31 @@ class DictionaryStructure(object):
|
||||
|
||||
def get_is_in_expressions(self, dict_name, row, parent_row):
|
||||
if self.layout.is_simple:
|
||||
child_key_expr = 'toUInt64({})'.format(row.data[self.keys[0].name])
|
||||
parent_key_expr = 'toUInt64({})'.format(parent_row.data[self.keys[0].name])
|
||||
child_key_expr = "toUInt64({})".format(row.data[self.keys[0].name])
|
||||
parent_key_expr = "toUInt64({})".format(parent_row.data[self.keys[0].name])
|
||||
return [
|
||||
"dictIsIn('{dict_name}', {child_key}, {parent_key})".format(
|
||||
dict_name=dict_name,
|
||||
child_key=child_key_expr,
|
||||
parent_key=parent_key_expr, )
|
||||
parent_key=parent_key_expr,
|
||||
)
|
||||
]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class Dictionary(object):
|
||||
def __init__(self, name, structure, source, config_path,
|
||||
table_name, fields, min_lifetime=3, max_lifetime=5):
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
structure,
|
||||
source,
|
||||
config_path,
|
||||
table_name,
|
||||
fields,
|
||||
min_lifetime=3,
|
||||
max_lifetime=5,
|
||||
):
|
||||
self.name = name
|
||||
self.structure = copy.deepcopy(structure)
|
||||
self.source = copy.deepcopy(source)
|
||||
@ -313,9 +362,10 @@ class Dictionary(object):
|
||||
self.max_lifetime = max_lifetime
|
||||
|
||||
def generate_config(self):
|
||||
with open(self.config_path, 'w') as result:
|
||||
if 'direct' not in self.structure.layout.get_str():
|
||||
result.write('''
|
||||
with open(self.config_path, "w") as result:
|
||||
if "direct" not in self.structure.layout.get_str():
|
||||
result.write(
|
||||
"""
|
||||
<clickhouse>
|
||||
<dictionary>
|
||||
<lifetime>
|
||||
@ -329,15 +379,17 @@ class Dictionary(object):
|
||||
</source>
|
||||
</dictionary>
|
||||
</clickhouse>
|
||||
'''.format(
|
||||
min_lifetime=self.min_lifetime,
|
||||
max_lifetime=self.max_lifetime,
|
||||
name=self.name,
|
||||
structure=self.structure.get_structure_str(),
|
||||
source=self.source.get_source_str(self.table_name),
|
||||
))
|
||||
""".format(
|
||||
min_lifetime=self.min_lifetime,
|
||||
max_lifetime=self.max_lifetime,
|
||||
name=self.name,
|
||||
structure=self.structure.get_structure_str(),
|
||||
source=self.source.get_source_str(self.table_name),
|
||||
)
|
||||
)
|
||||
else:
|
||||
result.write('''
|
||||
result.write(
|
||||
"""
|
||||
<clickhouse>
|
||||
<dictionary>
|
||||
<name>{name}</name>
|
||||
@ -347,38 +399,59 @@ class Dictionary(object):
|
||||
</source>
|
||||
</dictionary>
|
||||
</clickhouse>
|
||||
'''.format(
|
||||
min_lifetime=self.min_lifetime,
|
||||
max_lifetime=self.max_lifetime,
|
||||
name=self.name,
|
||||
structure=self.structure.get_structure_str(),
|
||||
source=self.source.get_source_str(self.table_name),
|
||||
))
|
||||
""".format(
|
||||
min_lifetime=self.min_lifetime,
|
||||
max_lifetime=self.max_lifetime,
|
||||
name=self.name,
|
||||
structure=self.structure.get_structure_str(),
|
||||
source=self.source.get_source_str(self.table_name),
|
||||
)
|
||||
)
|
||||
|
||||
def prepare_source(self, cluster):
|
||||
self.source.prepare(self.structure, self.table_name, cluster)
|
||||
|
||||
def load_data(self, data):
|
||||
if not self.source.prepared:
|
||||
raise Exception("Cannot load data for dictionary {}, source is not prepared".format(self.name))
|
||||
raise Exception(
|
||||
"Cannot load data for dictionary {}, source is not prepared".format(
|
||||
self.name
|
||||
)
|
||||
)
|
||||
|
||||
self.source.load_data(data, self.table_name)
|
||||
|
||||
def get_select_get_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_get_expressions(self.name, field, row)]
|
||||
return [
|
||||
"select {}".format(expr)
|
||||
for expr in self.structure.get_get_expressions(self.name, field, row)
|
||||
]
|
||||
|
||||
def get_select_get_or_default_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in
|
||||
self.structure.get_get_or_default_expressions(self.name, field, row)]
|
||||
return [
|
||||
"select {}".format(expr)
|
||||
for expr in self.structure.get_get_or_default_expressions(
|
||||
self.name, field, row
|
||||
)
|
||||
]
|
||||
|
||||
def get_select_has_queries(self, field, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_has_expressions(self.name, field, row)]
|
||||
return [
|
||||
"select {}".format(expr)
|
||||
for expr in self.structure.get_has_expressions(self.name, field, row)
|
||||
]
|
||||
|
||||
def get_hierarchical_queries(self, row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_hierarchical_expressions(self.name, row)]
|
||||
return [
|
||||
"select {}".format(expr)
|
||||
for expr in self.structure.get_hierarchical_expressions(self.name, row)
|
||||
]
|
||||
|
||||
def get_is_in_queries(self, row, parent_row):
|
||||
return ['select {}'.format(expr) for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)]
|
||||
return [
|
||||
"select {}".format(expr)
|
||||
for expr in self.structure.get_is_in_expressions(self.name, row, parent_row)
|
||||
]
|
||||
|
||||
def is_complex(self):
|
||||
return self.structure.layout.is_complex
|
||||
|
@ -11,9 +11,18 @@ import pymysql.cursors
|
||||
import redis
|
||||
import logging
|
||||
|
||||
|
||||
class ExternalSource(object):
|
||||
def __init__(self, name, internal_hostname, internal_port,
|
||||
docker_hostname, docker_port, user, password):
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
):
|
||||
self.name = name
|
||||
self.internal_hostname = internal_hostname
|
||||
self.internal_port = int(internal_port)
|
||||
@ -23,17 +32,26 @@ class ExternalSource(object):
|
||||
self.password = password
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"get_source_config_part", self.__class__.__name__))
|
||||
raise NotImplementedError(
|
||||
"Method {} is not implemented for {}".format(
|
||||
"get_source_config_part", self.__class__.__name__
|
||||
)
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__))
|
||||
raise NotImplementedError(
|
||||
"Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__
|
||||
)
|
||||
)
|
||||
|
||||
# data is banch of Row
|
||||
def load_data(self, data):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__))
|
||||
raise NotImplementedError(
|
||||
"Method {} is not implemented for {}".format(
|
||||
"prepare_remote_source", self.__class__.__name__
|
||||
)
|
||||
)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return True
|
||||
@ -41,29 +59,32 @@ class ExternalSource(object):
|
||||
|
||||
class SourceMySQL(ExternalSource):
|
||||
TYPE_MAPPING = {
|
||||
'UInt8': 'tinyint unsigned',
|
||||
'UInt16': 'smallint unsigned',
|
||||
'UInt32': 'int unsigned',
|
||||
'UInt64': 'bigint unsigned',
|
||||
'Int8': 'tinyint',
|
||||
'Int16': 'smallint',
|
||||
'Int32': 'int',
|
||||
'Int64': 'bigint',
|
||||
'UUID': 'varchar(36)',
|
||||
'Date': 'date',
|
||||
'DateTime': 'datetime',
|
||||
'String': 'text',
|
||||
'Float32': 'float',
|
||||
'Float64': 'double'
|
||||
"UInt8": "tinyint unsigned",
|
||||
"UInt16": "smallint unsigned",
|
||||
"UInt32": "int unsigned",
|
||||
"UInt64": "bigint unsigned",
|
||||
"Int8": "tinyint",
|
||||
"Int16": "smallint",
|
||||
"Int32": "int",
|
||||
"Int64": "bigint",
|
||||
"UUID": "varchar(36)",
|
||||
"Date": "date",
|
||||
"DateTime": "datetime",
|
||||
"String": "text",
|
||||
"Float32": "float",
|
||||
"Float64": "double",
|
||||
}
|
||||
|
||||
def create_mysql_conn(self):
|
||||
logging.debug(f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}")
|
||||
logging.debug(
|
||||
f"pymysql connect {self.user}, {self.password}, {self.internal_hostname}, {self.internal_port}"
|
||||
)
|
||||
self.connection = pymysql.connect(
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
host=self.internal_hostname,
|
||||
port=self.internal_port)
|
||||
port=self.internal_port,
|
||||
)
|
||||
|
||||
def execute_mysql_query(self, query):
|
||||
with warnings.catch_warnings():
|
||||
@ -73,7 +94,7 @@ class SourceMySQL(ExternalSource):
|
||||
self.connection.commit()
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<mysql>
|
||||
<replica>
|
||||
<priority>1</priority>
|
||||
@ -89,7 +110,7 @@ class SourceMySQL(ExternalSource):
|
||||
<password>{password}</password>
|
||||
<db>test</db>
|
||||
<table>{tbl}</table>
|
||||
</mysql>'''.format(
|
||||
</mysql>""".format(
|
||||
hostname=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
@ -101,14 +122,20 @@ class SourceMySQL(ExternalSource):
|
||||
if self.internal_hostname is None:
|
||||
self.internal_hostname = cluster.mysql_ip
|
||||
self.create_mysql_conn()
|
||||
self.execute_mysql_query("create database if not exists test default character set 'utf8'")
|
||||
self.execute_mysql_query(
|
||||
"create database if not exists test default character set 'utf8'"
|
||||
)
|
||||
self.execute_mysql_query("drop table if exists test.{}".format(table_name))
|
||||
fields_strs = []
|
||||
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
|
||||
fields_strs.append(field.name + ' ' + self.TYPE_MAPPING[field.field_type])
|
||||
create_query = '''create table test.{table_name} (
|
||||
for field in (
|
||||
structure.keys + structure.ordinary_fields + structure.range_fields
|
||||
):
|
||||
fields_strs.append(field.name + " " + self.TYPE_MAPPING[field.field_type])
|
||||
create_query = """create table test.{table_name} (
|
||||
{fields_str});
|
||||
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
|
||||
""".format(
|
||||
table_name=table_name, fields_str=",".join(fields_strs)
|
||||
)
|
||||
self.execute_mysql_query(create_query)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
@ -126,18 +153,16 @@ class SourceMySQL(ExternalSource):
|
||||
else:
|
||||
data = str(data)
|
||||
sorted_row.append(data)
|
||||
values_strs.append('(' + ','.join(sorted_row) + ')')
|
||||
query = 'insert into test.{} ({}) values {}'.format(
|
||||
table_name,
|
||||
','.join(self.ordered_names),
|
||||
','.join(values_strs))
|
||||
values_strs.append("(" + ",".join(sorted_row) + ")")
|
||||
query = "insert into test.{} ({}) values {}".format(
|
||||
table_name, ",".join(self.ordered_names), ",".join(values_strs)
|
||||
)
|
||||
self.execute_mysql_query(query)
|
||||
|
||||
|
||||
class SourceMongo(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<mongodb>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
@ -146,7 +171,7 @@ class SourceMongo(ExternalSource):
|
||||
<db>test</db>
|
||||
<collection>{tbl}</collection>
|
||||
</mongodb>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
@ -155,22 +180,29 @@ class SourceMongo(ExternalSource):
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
connection_str = 'mongodb://{user}:{password}@{host}:{port}'.format(
|
||||
host=self.internal_hostname, port=self.internal_port,
|
||||
user=self.user, password=self.password)
|
||||
connection_str = "mongodb://{user}:{password}@{host}:{port}".format(
|
||||
host=self.internal_hostname,
|
||||
port=self.internal_port,
|
||||
user=self.user,
|
||||
password=self.password,
|
||||
)
|
||||
self.connection = pymongo.MongoClient(connection_str)
|
||||
self.converters = {}
|
||||
for field in structure.get_all_fields():
|
||||
if field.field_type == "Date":
|
||||
self.converters[field.name] = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d")
|
||||
self.converters[field.name] = lambda x: datetime.datetime.strptime(
|
||||
x, "%Y-%m-%d"
|
||||
)
|
||||
elif field.field_type == "DateTime":
|
||||
|
||||
def converter(x):
|
||||
return datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
|
||||
return datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
self.converters[field.name] = converter
|
||||
else:
|
||||
self.converters[field.name] = lambda x: x
|
||||
|
||||
self.db = self.connection['test']
|
||||
self.db = self.connection["test"]
|
||||
self.db.add_user(self.user, self.password)
|
||||
self.prepared = True
|
||||
|
||||
@ -191,15 +223,15 @@ class SourceMongoURI(SourceMongo):
|
||||
def compatible_with_layout(self, layout):
|
||||
# It is enough to test one layout for this dictionary, since we're
|
||||
# only testing that the connection with URI works.
|
||||
return layout.name == 'flat'
|
||||
return layout.name == "flat"
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<mongodb>
|
||||
<uri>mongodb://{user}:{password}@{host}:{port}/test</uri>
|
||||
<collection>{tbl}</collection>
|
||||
</mongodb>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
@ -209,9 +241,8 @@ class SourceMongoURI(SourceMongo):
|
||||
|
||||
|
||||
class SourceClickHouse(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<clickhouse>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
@ -220,7 +251,7 @@ class SourceClickHouse(ExternalSource):
|
||||
<db>test</db>
|
||||
<table>{tbl}</table>
|
||||
</clickhouse>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
user=self.user,
|
||||
@ -232,11 +263,15 @@ class SourceClickHouse(ExternalSource):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
self.node.query("CREATE DATABASE IF NOT EXISTS test")
|
||||
fields_strs = []
|
||||
for field in structure.keys + structure.ordinary_fields + structure.range_fields:
|
||||
fields_strs.append(field.name + ' ' + field.field_type)
|
||||
create_query = '''CREATE TABLE test.{table_name} (
|
||||
for field in (
|
||||
structure.keys + structure.ordinary_fields + structure.range_fields
|
||||
):
|
||||
fields_strs.append(field.name + " " + field.field_type)
|
||||
create_query = """CREATE TABLE test.{table_name} (
|
||||
{fields_str}) ENGINE MergeTree ORDER BY tuple();
|
||||
'''.format(table_name=table_name, fields_str=','.join(fields_strs))
|
||||
""".format(
|
||||
table_name=table_name, fields_str=",".join(fields_strs)
|
||||
)
|
||||
self.node.query(create_query)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
@ -254,31 +289,31 @@ class SourceClickHouse(ExternalSource):
|
||||
else:
|
||||
row_data = str(row_data)
|
||||
sorted_row.append(row_data)
|
||||
values_strs.append('(' + ','.join(sorted_row) + ')')
|
||||
query = 'INSERT INTO test.{} ({}) values {}'.format(
|
||||
table_name,
|
||||
','.join(self.ordered_names),
|
||||
','.join(values_strs))
|
||||
values_strs.append("(" + ",".join(sorted_row) + ")")
|
||||
query = "INSERT INTO test.{} ({}) values {}".format(
|
||||
table_name, ",".join(self.ordered_names), ",".join(values_strs)
|
||||
)
|
||||
self.node.query(query)
|
||||
|
||||
|
||||
class SourceFile(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
table_path = "/" + table_name + ".tsv"
|
||||
return '''
|
||||
return """
|
||||
<file>
|
||||
<path>{path}</path>
|
||||
<format>TabSeparated</format>
|
||||
</file>
|
||||
'''.format(
|
||||
""".format(
|
||||
path=table_path,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root")
|
||||
self.node.exec_in_container(
|
||||
["bash", "-c", "touch {}".format(path)], user="root"
|
||||
)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
@ -291,35 +326,45 @@ class SourceFile(ExternalSource):
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
|
||||
user="root")
|
||||
str_data = "\t".join(sorted_row)
|
||||
self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'cache' not in layout.name and 'direct' not in layout.name
|
||||
return "cache" not in layout.name and "direct" not in layout.name
|
||||
|
||||
|
||||
class _SourceExecutableBase(ExternalSource):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
raise NotImplementedError("Method {} is not implemented for {}".format(
|
||||
"_get_cmd", self.__class__.__name__))
|
||||
raise NotImplementedError(
|
||||
"Method {} is not implemented for {}".format(
|
||||
"_get_cmd", self.__class__.__name__
|
||||
)
|
||||
)
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
table_path = "/" + table_name + ".tsv"
|
||||
return '''
|
||||
return """
|
||||
<executable>
|
||||
<command>{cmd}</command>
|
||||
<format>TabSeparated</format>
|
||||
</executable>
|
||||
'''.format(
|
||||
""".format(
|
||||
cmd=self._get_cmd(table_path),
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user="root")
|
||||
self.node.exec_in_container(
|
||||
["bash", "-c", "touch {}".format(path)], user="root"
|
||||
)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
@ -332,27 +377,31 @@ class _SourceExecutableBase(ExternalSource):
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
|
||||
user='root')
|
||||
str_data = "\t".join(sorted_row)
|
||||
self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
|
||||
|
||||
class SourceExecutableHashed(_SourceExecutableBase):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
return "cat {}".format(path)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'hashed' in layout.name
|
||||
return "hashed" in layout.name
|
||||
|
||||
|
||||
class SourceExecutableCache(_SourceExecutableBase):
|
||||
|
||||
def _get_cmd(self, path):
|
||||
return "cat - >/dev/null;cat {}".format(path)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return 'cache' in layout.name
|
||||
return "cache" in layout.name
|
||||
|
||||
|
||||
class SourceHTTPBase(ExternalSource):
|
||||
@ -360,10 +409,11 @@ class SourceHTTPBase(ExternalSource):
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
self.http_port = SourceHTTPBase.PORT_COUNTER
|
||||
url = "{schema}://{host}:{port}/".format(schema=self._get_schema(), host=self.docker_hostname,
|
||||
port=self.http_port)
|
||||
url = "{schema}://{host}:{port}/".format(
|
||||
schema=self._get_schema(), host=self.docker_hostname, port=self.http_port
|
||||
)
|
||||
SourceHTTPBase.PORT_COUNTER += 1
|
||||
return '''
|
||||
return """
|
||||
<http>
|
||||
<url>{url}</url>
|
||||
<format>TabSeparated</format>
|
||||
@ -378,22 +428,37 @@ class SourceHTTPBase(ExternalSource):
|
||||
</header>
|
||||
</headers>
|
||||
</http>
|
||||
'''.format(url=url)
|
||||
""".format(
|
||||
url=url
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.node = cluster.instances[self.docker_hostname]
|
||||
path = "/" + table_name + ".tsv"
|
||||
self.node.exec_in_container(["bash", "-c", "touch {}".format(path)], user='root')
|
||||
self.node.exec_in_container(
|
||||
["bash", "-c", "touch {}".format(path)], user="root"
|
||||
)
|
||||
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
self.node.copy_file_to_container(os.path.join(script_dir, './http_server.py'), '/http_server.py')
|
||||
self.node.copy_file_to_container(os.path.join(script_dir, './fake_cert.pem'), '/fake_cert.pem')
|
||||
self.node.exec_in_container([
|
||||
"bash",
|
||||
"-c",
|
||||
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
|
||||
tbl=path, schema=self._get_schema(), host=self.docker_hostname, port=self.http_port)
|
||||
], detach=True)
|
||||
self.node.copy_file_to_container(
|
||||
os.path.join(script_dir, "./http_server.py"), "/http_server.py"
|
||||
)
|
||||
self.node.copy_file_to_container(
|
||||
os.path.join(script_dir, "./fake_cert.pem"), "/fake_cert.pem"
|
||||
)
|
||||
self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"python3 /http_server.py --data-path={tbl} --schema={schema} --host={host} --port={port} --cert-path=/fake_cert.pem".format(
|
||||
tbl=path,
|
||||
schema=self._get_schema(),
|
||||
host=self.docker_hostname,
|
||||
port=self.http_port,
|
||||
),
|
||||
],
|
||||
detach=True,
|
||||
)
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
self.prepared = True
|
||||
|
||||
@ -406,9 +471,15 @@ class SourceHTTPBase(ExternalSource):
|
||||
for name in self.ordered_names:
|
||||
sorted_row.append(str(row.data[name]))
|
||||
|
||||
str_data = '\t'.join(sorted_row)
|
||||
self.node.exec_in_container(["bash", "-c", "echo \"{row}\" >> {fname}".format(row=str_data, fname=path)],
|
||||
user='root')
|
||||
str_data = "\t".join(sorted_row)
|
||||
self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
'echo "{row}" >> {fname}'.format(row=str_data, fname=path),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
|
||||
|
||||
class SourceHTTP(SourceHTTPBase):
|
||||
@ -423,29 +494,46 @@ class SourceHTTPS(SourceHTTPBase):
|
||||
|
||||
class SourceCassandra(ExternalSource):
|
||||
TYPE_MAPPING = {
|
||||
'UInt8': 'tinyint',
|
||||
'UInt16': 'smallint',
|
||||
'UInt32': 'int',
|
||||
'UInt64': 'bigint',
|
||||
'Int8': 'tinyint',
|
||||
'Int16': 'smallint',
|
||||
'Int32': 'int',
|
||||
'Int64': 'bigint',
|
||||
'UUID': 'uuid',
|
||||
'Date': 'date',
|
||||
'DateTime': 'timestamp',
|
||||
'String': 'text',
|
||||
'Float32': 'float',
|
||||
'Float64': 'double'
|
||||
"UInt8": "tinyint",
|
||||
"UInt16": "smallint",
|
||||
"UInt32": "int",
|
||||
"UInt64": "bigint",
|
||||
"Int8": "tinyint",
|
||||
"Int16": "smallint",
|
||||
"Int32": "int",
|
||||
"Int64": "bigint",
|
||||
"UUID": "uuid",
|
||||
"Date": "date",
|
||||
"DateTime": "timestamp",
|
||||
"String": "text",
|
||||
"Float32": "float",
|
||||
"Float64": "double",
|
||||
}
|
||||
|
||||
def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password):
|
||||
ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user,
|
||||
password)
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
):
|
||||
ExternalSource.__init__(
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
)
|
||||
self.structure = dict()
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<cassandra>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
@ -454,7 +542,7 @@ class SourceCassandra(ExternalSource):
|
||||
<allow_filtering>1</allow_filtering>
|
||||
<where>"Int64_" < 1000000000000000000</where>
|
||||
</cassandra>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
table=table_name,
|
||||
@ -464,49 +552,79 @@ class SourceCassandra(ExternalSource):
|
||||
if self.internal_hostname is None:
|
||||
self.internal_hostname = cluster.cassandra_ip
|
||||
|
||||
self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port)
|
||||
self.client = cassandra.cluster.Cluster(
|
||||
[self.internal_hostname], port=self.internal_port
|
||||
)
|
||||
self.session = self.client.connect()
|
||||
self.session.execute(
|
||||
"create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};")
|
||||
"create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};"
|
||||
)
|
||||
self.session.execute('drop table if exists test."{}"'.format(table_name))
|
||||
self.structure[table_name] = structure
|
||||
columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()]
|
||||
columns = [
|
||||
'"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type]
|
||||
for col in structure.get_all_fields()
|
||||
]
|
||||
keys = ['"' + col.name + '"' for col in structure.keys]
|
||||
query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format(
|
||||
name=table_name, columns=', '.join(columns), pk=', '.join(keys))
|
||||
name=table_name, columns=", ".join(columns), pk=", ".join(keys)
|
||||
)
|
||||
self.session.execute(query)
|
||||
self.prepared = True
|
||||
|
||||
def get_value_to_insert(self, value, type):
|
||||
if type == 'UUID':
|
||||
if type == "UUID":
|
||||
return uuid.UUID(value)
|
||||
elif type == 'DateTime':
|
||||
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
||||
elif type == "DateTime":
|
||||
return datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S")
|
||||
return value
|
||||
|
||||
def load_data(self, data, table_name):
|
||||
names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()]
|
||||
names_and_types = [
|
||||
(field.name, field.field_type)
|
||||
for field in self.structure[table_name].get_all_fields()
|
||||
]
|
||||
columns = ['"' + col[0] + '"' for col in names_and_types]
|
||||
insert = 'insert into test."{table}" ({columns}) values ({args})'.format(
|
||||
table=table_name, columns=','.join(columns), args=','.join(['%s'] * len(columns)))
|
||||
table=table_name,
|
||||
columns=",".join(columns),
|
||||
args=",".join(["%s"] * len(columns)),
|
||||
)
|
||||
for row in data:
|
||||
values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types]
|
||||
values = [
|
||||
self.get_value_to_insert(row.get_value_by_name(col[0]), col[1])
|
||||
for col in names_and_types
|
||||
]
|
||||
self.session.execute(insert, values)
|
||||
|
||||
|
||||
class SourceRedis(ExternalSource):
|
||||
def __init__(
|
||||
self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, db_index,
|
||||
storage_type
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
db_index,
|
||||
storage_type,
|
||||
):
|
||||
super(SourceRedis, self).__init__(
|
||||
name, internal_hostname, internal_port, docker_hostname, docker_port, user, password
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
)
|
||||
self.storage_type = storage_type
|
||||
self.db_index = db_index
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
return '''
|
||||
return """
|
||||
<redis>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
@ -514,7 +632,7 @@ class SourceRedis(ExternalSource):
|
||||
<db_index>{db_index}</db_index>
|
||||
<storage_type>{storage_type}</storage_type>
|
||||
</redis>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
password=self.password,
|
||||
@ -523,8 +641,12 @@ class SourceRedis(ExternalSource):
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port, db=self.db_index,
|
||||
password=self.password or None)
|
||||
self.client = redis.StrictRedis(
|
||||
host=self.internal_hostname,
|
||||
port=self.internal_port,
|
||||
db=self.db_index,
|
||||
password=self.password or None,
|
||||
)
|
||||
self.prepared = True
|
||||
self.ordered_names = structure.get_ordered_names()
|
||||
|
||||
@ -540,33 +662,52 @@ class SourceRedis(ExternalSource):
|
||||
self.client.hset(*values)
|
||||
|
||||
def compatible_with_layout(self, layout):
|
||||
return layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "hash_map"
|
||||
return (
|
||||
layout.is_simple
|
||||
and self.storage_type == "simple"
|
||||
or layout.is_complex
|
||||
and self.storage_type == "hash_map"
|
||||
)
|
||||
|
||||
|
||||
class SourceAerospike(ExternalSource):
|
||||
def __init__(self, name, internal_hostname, internal_port,
|
||||
docker_hostname, docker_port, user, password):
|
||||
ExternalSource.__init__(self, name, internal_hostname, internal_port,
|
||||
docker_hostname, docker_port, user, password)
|
||||
def __init__(
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
):
|
||||
ExternalSource.__init__(
|
||||
self,
|
||||
name,
|
||||
internal_hostname,
|
||||
internal_port,
|
||||
docker_hostname,
|
||||
docker_port,
|
||||
user,
|
||||
password,
|
||||
)
|
||||
self.namespace = "test"
|
||||
self.set = "test_set"
|
||||
|
||||
def get_source_str(self, table_name):
|
||||
print("AEROSPIKE get source str")
|
||||
return '''
|
||||
return """
|
||||
<aerospike>
|
||||
<host>{host}</host>
|
||||
<port>{port}</port>
|
||||
</aerospike>
|
||||
'''.format(
|
||||
""".format(
|
||||
host=self.docker_hostname,
|
||||
port=self.docker_port,
|
||||
)
|
||||
|
||||
def prepare(self, structure, table_name, cluster):
|
||||
config = {
|
||||
'hosts': [(self.internal_hostname, self.internal_port)]
|
||||
}
|
||||
config = {"hosts": [(self.internal_hostname, self.internal_port)]}
|
||||
self.client = aerospike.client(config).connect()
|
||||
self.prepared = True
|
||||
print("PREPARED AEROSPIKE")
|
||||
@ -601,10 +742,14 @@ class SourceAerospike(ExternalSource):
|
||||
for value in values:
|
||||
key = (self.namespace, self.set, value[0])
|
||||
print(key)
|
||||
self.client.put(key, {"bin_value": value[1]}, policy={"key": aerospike.POLICY_KEY_SEND})
|
||||
self.client.put(
|
||||
key,
|
||||
{"bin_value": value[1]},
|
||||
policy={"key": aerospike.POLICY_KEY_SEND},
|
||||
)
|
||||
assert self.client.exists(key)
|
||||
else:
|
||||
assert ("VALUES SIZE != 2")
|
||||
assert "VALUES SIZE != 2"
|
||||
|
||||
# print(values)
|
||||
|
||||
|
@ -10,27 +10,44 @@ import socket
|
||||
import tempfile
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
class mk_krb_conf(object):
|
||||
def __init__(self, krb_conf, kdc_ip):
|
||||
self.krb_conf = krb_conf
|
||||
self.kdc_ip = kdc_ip
|
||||
self.amended_krb_conf = None
|
||||
|
||||
def __enter__(self):
|
||||
with open(self.krb_conf) as f:
|
||||
content = f.read()
|
||||
amended_content = content.replace('hdfskerberos', self.kdc_ip)
|
||||
amended_content = content.replace("hdfskerberos", self.kdc_ip)
|
||||
self.amended_krb_conf = tempfile.NamedTemporaryFile(delete=False, mode="w+")
|
||||
self.amended_krb_conf.write(amended_content)
|
||||
self.amended_krb_conf.close()
|
||||
return self.amended_krb_conf.name
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.amended_krb_conf is not None:
|
||||
self.amended_krb_conf.close()
|
||||
|
||||
|
||||
class HDFSApi(object):
|
||||
def __init__(self, user, host, proxy_port, data_port, timeout=100, kerberized=False, principal=None,
|
||||
keytab=None, krb_conf=None,
|
||||
protocol = "http", hdfs_ip = None, kdc_ip = None):
|
||||
def __init__(
|
||||
self,
|
||||
user,
|
||||
host,
|
||||
proxy_port,
|
||||
data_port,
|
||||
timeout=100,
|
||||
kerberized=False,
|
||||
principal=None,
|
||||
keytab=None,
|
||||
krb_conf=None,
|
||||
protocol="http",
|
||||
hdfs_ip=None,
|
||||
kdc_ip=None,
|
||||
):
|
||||
self.host = host
|
||||
self.protocol = protocol
|
||||
self.proxy_port = proxy_port
|
||||
@ -55,7 +72,11 @@ class HDFSApi(object):
|
||||
|
||||
if kerberized:
|
||||
self._run_kinit()
|
||||
self.kerberos_auth = reqkerb.HTTPKerberosAuth(mutual_authentication=reqkerb.DISABLED, hostname_override=self.host, principal=self.principal)
|
||||
self.kerberos_auth = reqkerb.HTTPKerberosAuth(
|
||||
mutual_authentication=reqkerb.DISABLED,
|
||||
hostname_override=self.host,
|
||||
principal=self.principal,
|
||||
)
|
||||
if self.kerberos_auth is None:
|
||||
print("failed to obtain kerberos_auth")
|
||||
else:
|
||||
@ -70,7 +91,11 @@ class HDFSApi(object):
|
||||
|
||||
os.environ["KRB5_CONFIG"] = instantiated_krb_conf
|
||||
|
||||
cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(instantiated_krb_conf=instantiated_krb_conf, keytab=self.keytab, principal=self.principal)
|
||||
cmd = "(kinit -R -t {keytab} -k {principal} || (sleep 5 && kinit -R -t {keytab} -k {principal})) ; klist".format(
|
||||
instantiated_krb_conf=instantiated_krb_conf,
|
||||
keytab=self.keytab,
|
||||
principal=self.principal,
|
||||
)
|
||||
|
||||
start = time.time()
|
||||
|
||||
@ -79,10 +104,18 @@ class HDFSApi(object):
|
||||
res = subprocess.run(cmd, shell=True)
|
||||
if res.returncode != 0:
|
||||
# check_call(...) from subprocess does not print stderr, so we do it manually
|
||||
logging.debug('Stderr:\n{}\n'.format(res.stderr.decode('utf-8')))
|
||||
logging.debug('Stdout:\n{}\n'.format(res.stdout.decode('utf-8')))
|
||||
logging.debug('Env:\n{}\n'.format(env))
|
||||
raise Exception('Command {} return non-zero code {}: {}'.format(args, res.returncode, res.stderr.decode('utf-8')))
|
||||
logging.debug(
|
||||
"Stderr:\n{}\n".format(res.stderr.decode("utf-8"))
|
||||
)
|
||||
logging.debug(
|
||||
"Stdout:\n{}\n".format(res.stdout.decode("utf-8"))
|
||||
)
|
||||
logging.debug("Env:\n{}\n".format(env))
|
||||
raise Exception(
|
||||
"Command {} return non-zero code {}: {}".format(
|
||||
args, res.returncode, res.stderr.decode("utf-8")
|
||||
)
|
||||
)
|
||||
|
||||
logging.debug("KDC started, kinit successfully run")
|
||||
return
|
||||
@ -97,28 +130,60 @@ class HDFSApi(object):
|
||||
for i in range(0, cnt):
|
||||
logging.debug(f"CALL: {str(kwargs)}")
|
||||
response_data = func(**kwargs)
|
||||
logging.debug(f"response_data:{response_data.content} headers:{response_data.headers}")
|
||||
logging.debug(
|
||||
f"response_data:{response_data.content} headers:{response_data.headers}"
|
||||
)
|
||||
if response_data.status_code == expected_code:
|
||||
return response_data
|
||||
else:
|
||||
logging.error(f"unexpected response_data.status_code {response_data.status_code} != {expected_code}")
|
||||
logging.error(
|
||||
f"unexpected response_data.status_code {response_data.status_code} != {expected_code}"
|
||||
)
|
||||
time.sleep(1)
|
||||
response_data.raise_for_status()
|
||||
|
||||
|
||||
def read_data(self, path, universal_newlines=True):
|
||||
logging.debug("read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(self.protocol, self.host, self.hdfs_ip, self.proxy_port, self.data_port, path))
|
||||
response = self.req_wrapper(requests.get, 307, url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path), headers={'host': str(self.hdfs_ip)}, allow_redirects=False, verify=False, auth=self.kerberos_auth)
|
||||
logging.debug(
|
||||
"read_data protocol:{} host:{} ip:{} proxy port:{} data port:{} path: {}".format(
|
||||
self.protocol,
|
||||
self.host,
|
||||
self.hdfs_ip,
|
||||
self.proxy_port,
|
||||
self.data_port,
|
||||
path,
|
||||
)
|
||||
)
|
||||
response = self.req_wrapper(
|
||||
requests.get,
|
||||
307,
|
||||
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=OPEN".format(
|
||||
protocol=self.protocol, ip=self.hdfs_ip, port=self.proxy_port, path=path
|
||||
),
|
||||
headers={"host": str(self.hdfs_ip)},
|
||||
allow_redirects=False,
|
||||
verify=False,
|
||||
auth=self.kerberos_auth,
|
||||
)
|
||||
# additional_params = '&'.join(response.headers['Location'].split('&')[1:2])
|
||||
location = None
|
||||
if self.kerberized:
|
||||
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port))
|
||||
location = response.headers["Location"].replace(
|
||||
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
|
||||
)
|
||||
else:
|
||||
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port))
|
||||
location = response.headers["Location"].replace(
|
||||
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
|
||||
)
|
||||
logging.debug("redirected to {}".format(location))
|
||||
|
||||
response_data = self.req_wrapper(requests.get, 200, url=location, headers={'host': self.hdfs_ip},
|
||||
verify=False, auth=self.kerberos_auth)
|
||||
response_data = self.req_wrapper(
|
||||
requests.get,
|
||||
200,
|
||||
url=location,
|
||||
headers={"host": self.hdfs_ip},
|
||||
verify=False,
|
||||
auth=self.kerberos_auth,
|
||||
)
|
||||
|
||||
if universal_newlines:
|
||||
return response_data.text
|
||||
@ -126,23 +191,38 @@ class HDFSApi(object):
|
||||
return response_data.content
|
||||
|
||||
def write_data(self, path, content):
|
||||
logging.debug("write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format(
|
||||
self.protocol, self.host, self.proxy_port, path, self.user, self.principal))
|
||||
named_file = NamedTemporaryFile(mode='wb+')
|
||||
logging.debug(
|
||||
"write_data protocol:{} host:{} port:{} path: {} user:{}, principal:{}".format(
|
||||
self.protocol,
|
||||
self.host,
|
||||
self.proxy_port,
|
||||
path,
|
||||
self.user,
|
||||
self.principal,
|
||||
)
|
||||
)
|
||||
named_file = NamedTemporaryFile(mode="wb+")
|
||||
fpath = named_file.name
|
||||
if isinstance(content, str):
|
||||
content = content.encode()
|
||||
named_file.write(content)
|
||||
named_file.flush()
|
||||
|
||||
response = self.req_wrapper(requests.put, 307,
|
||||
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(protocol=self.protocol, ip=self.hdfs_ip,
|
||||
port=self.proxy_port,
|
||||
path=path, user=self.user),
|
||||
response = self.req_wrapper(
|
||||
requests.put,
|
||||
307,
|
||||
url="{protocol}://{ip}:{port}/webhdfs/v1{path}?op=CREATE".format(
|
||||
protocol=self.protocol,
|
||||
ip=self.hdfs_ip,
|
||||
port=self.proxy_port,
|
||||
path=path,
|
||||
user=self.user,
|
||||
),
|
||||
allow_redirects=False,
|
||||
headers={'host': str(self.hdfs_ip)},
|
||||
params={'overwrite' : 'true'},
|
||||
verify=False, auth=self.kerberos_auth
|
||||
headers={"host": str(self.hdfs_ip)},
|
||||
params={"overwrite": "true"},
|
||||
verify=False,
|
||||
auth=self.kerberos_auth,
|
||||
)
|
||||
|
||||
logging.debug("HDFS api response:{}".format(response.headers))
|
||||
@ -150,23 +230,30 @@ class HDFSApi(object):
|
||||
# additional_params = '&'.join(
|
||||
# response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"])
|
||||
if self.kerberized:
|
||||
location = response.headers['Location'].replace("kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port))
|
||||
location = response.headers["Location"].replace(
|
||||
"kerberizedhdfs1:1006", "{}:{}".format(self.hdfs_ip, self.data_port)
|
||||
)
|
||||
else:
|
||||
location = response.headers['Location'].replace("hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port))
|
||||
location = response.headers["Location"].replace(
|
||||
"hdfs1:50075", "{}:{}".format(self.hdfs_ip, self.data_port)
|
||||
)
|
||||
|
||||
with open(fpath, mode="rb") as fh:
|
||||
file_data = fh.read()
|
||||
protocol = "http" # self.protocol
|
||||
response = self.req_wrapper(requests.put, 201,
|
||||
protocol = "http" # self.protocol
|
||||
response = self.req_wrapper(
|
||||
requests.put,
|
||||
201,
|
||||
url="{location}".format(location=location),
|
||||
data=file_data,
|
||||
headers={'content-type':'text/plain', 'host': str(self.hdfs_ip)},
|
||||
params={'file': path, 'user.name' : self.user},
|
||||
allow_redirects=False, verify=False, auth=self.kerberos_auth
|
||||
headers={"content-type": "text/plain", "host": str(self.hdfs_ip)},
|
||||
params={"file": path, "user.name": self.user},
|
||||
allow_redirects=False,
|
||||
verify=False,
|
||||
auth=self.kerberos_auth,
|
||||
)
|
||||
logging.debug(f"{response.content} {response.headers}")
|
||||
|
||||
|
||||
def write_gzip_data(self, path, content):
|
||||
if isinstance(content, str):
|
||||
content = content.encode()
|
||||
@ -176,4 +263,10 @@ class HDFSApi(object):
|
||||
self.write_data(path, out.getvalue())
|
||||
|
||||
def read_gzip_data(self, path):
|
||||
return gzip.GzipFile(fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))).read().decode()
|
||||
return (
|
||||
gzip.GzipFile(
|
||||
fileobj=io.BytesIO(self.read_data(path, universal_newlines=False))
|
||||
)
|
||||
.read()
|
||||
.decode()
|
||||
)
|
||||
|
@ -9,9 +9,14 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
# Decorator used to see if authentication works for external dictionary who use a HTTP source.
|
||||
def check_auth(fn):
|
||||
def wrapper(req):
|
||||
auth_header = req.headers.get('authorization', None)
|
||||
api_key = req.headers.get('api-key', None)
|
||||
if not auth_header or auth_header != 'Basic Zm9vOmJhcg==' or not api_key or api_key != 'secret':
|
||||
auth_header = req.headers.get("authorization", None)
|
||||
api_key = req.headers.get("api-key", None)
|
||||
if (
|
||||
not auth_header
|
||||
or auth_header != "Basic Zm9vOmJhcg=="
|
||||
or not api_key
|
||||
or api_key != "secret"
|
||||
):
|
||||
req.send_response(401)
|
||||
else:
|
||||
fn(req)
|
||||
@ -35,15 +40,15 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
|
||||
|
||||
def __send_headers(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/tsv')
|
||||
self.send_header("Content-type", "text/tsv")
|
||||
self.end_headers()
|
||||
|
||||
def __send_data(self, only_ids=None):
|
||||
with open(data_path, 'r') as fl:
|
||||
reader = csv.reader(fl, delimiter='\t')
|
||||
with open(data_path, "r") as fl:
|
||||
reader = csv.reader(fl, delimiter="\t")
|
||||
for row in reader:
|
||||
if not only_ids or (row[0] in only_ids):
|
||||
self.wfile.write(('\t'.join(row) + '\n').encode())
|
||||
self.wfile.write(("\t".join(row) + "\n").encode())
|
||||
|
||||
def __read_and_decode_post_ids(self):
|
||||
data = self.__read_and_decode_post_data()
|
||||
@ -51,7 +56,7 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
|
||||
|
||||
def __read_and_decode_post_data(self):
|
||||
transfer_encoding = self.headers.get("Transfer-encoding")
|
||||
decoded = "";
|
||||
decoded = ""
|
||||
if transfer_encoding == "chunked":
|
||||
while True:
|
||||
s = self.rfile.readline().decode()
|
||||
@ -69,19 +74,29 @@ def start_server(server_address, data_path, schema, cert_path, address_family):
|
||||
HTTPServer.address_family = socket.AF_INET6
|
||||
httpd = HTTPServer(server_address, TSVHTTPHandler)
|
||||
if schema == "https":
|
||||
httpd.socket = ssl.wrap_socket(httpd.socket, certfile=cert_path, server_side=True)
|
||||
httpd.socket = ssl.wrap_socket(
|
||||
httpd.socket, certfile=cert_path, server_side=True
|
||||
)
|
||||
httpd.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Simple HTTP server returns data from file")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple HTTP server returns data from file"
|
||||
)
|
||||
parser.add_argument("--host", default="localhost")
|
||||
parser.add_argument("--port", default=5555, type=int)
|
||||
parser.add_argument("--data-path", required=True)
|
||||
parser.add_argument("--schema", choices=("http", "https"), required=True)
|
||||
parser.add_argument("--cert-path", default="./fake_cert.pem")
|
||||
parser.add_argument('--address-family', choices=("ipv4", "ipv6"), default="ipv4")
|
||||
parser.add_argument("--address-family", choices=("ipv4", "ipv6"), default="ipv4")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
start_server((args.host, args.port), args.data_path, args.schema, args.cert_path, args.address_family)
|
||||
start_server(
|
||||
(args.host, args.port),
|
||||
args.data_path,
|
||||
args.schema,
|
||||
args.cert_path,
|
||||
args.address_family,
|
||||
)
|
||||
|
@ -22,26 +22,38 @@ class PartitionManager:
|
||||
self._netem_delayed_instances = []
|
||||
_NetworkManager.get()
|
||||
|
||||
def drop_instance_zk_connections(self, instance, action='DROP'):
|
||||
def drop_instance_zk_connections(self, instance, action="DROP"):
|
||||
self._check_instance(instance)
|
||||
|
||||
self._add_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
|
||||
self._add_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
|
||||
self._add_rule(
|
||||
{"source": instance.ip_address, "destination_port": 2181, "action": action}
|
||||
)
|
||||
self._add_rule(
|
||||
{"destination": instance.ip_address, "source_port": 2181, "action": action}
|
||||
)
|
||||
|
||||
def restore_instance_zk_connections(self, instance, action='DROP'):
|
||||
def restore_instance_zk_connections(self, instance, action="DROP"):
|
||||
self._check_instance(instance)
|
||||
|
||||
self._delete_rule({'source': instance.ip_address, 'destination_port': 2181, 'action': action})
|
||||
self._delete_rule({'destination': instance.ip_address, 'source_port': 2181, 'action': action})
|
||||
self._delete_rule(
|
||||
{"source": instance.ip_address, "destination_port": 2181, "action": action}
|
||||
)
|
||||
self._delete_rule(
|
||||
{"destination": instance.ip_address, "source_port": 2181, "action": action}
|
||||
)
|
||||
|
||||
def partition_instances(self, left, right, port=None, action='DROP'):
|
||||
def partition_instances(self, left, right, port=None, action="DROP"):
|
||||
self._check_instance(left)
|
||||
self._check_instance(right)
|
||||
|
||||
def create_rule(src, dst):
|
||||
rule = {'source': src.ip_address, 'destination': dst.ip_address, 'action': action}
|
||||
rule = {
|
||||
"source": src.ip_address,
|
||||
"destination": dst.ip_address,
|
||||
"action": action,
|
||||
}
|
||||
if port is not None:
|
||||
rule['destination_port'] = port
|
||||
rule["destination_port"] = port
|
||||
return rule
|
||||
|
||||
self._add_rule(create_rule(left, right))
|
||||
@ -57,7 +69,9 @@ class PartitionManager:
|
||||
|
||||
while self._netem_delayed_instances:
|
||||
instance = self._netem_delayed_instances.pop()
|
||||
instance.exec_in_container(["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root")
|
||||
instance.exec_in_container(
|
||||
["bash", "-c", "tc qdisc del dev eth0 root netem"], user="root"
|
||||
)
|
||||
|
||||
def pop_rules(self):
|
||||
res = self._iptables_rules[:]
|
||||
@ -71,7 +85,7 @@ class PartitionManager:
|
||||
@staticmethod
|
||||
def _check_instance(instance):
|
||||
if instance.ip_address is None:
|
||||
raise Exception('Instance + ' + instance.name + ' is not launched!')
|
||||
raise Exception("Instance + " + instance.name + " is not launched!")
|
||||
|
||||
def _add_rule(self, rule):
|
||||
_NetworkManager.get().add_iptables_rule(**rule)
|
||||
@ -82,7 +96,14 @@ class PartitionManager:
|
||||
self._iptables_rules.remove(rule)
|
||||
|
||||
def _add_tc_netem_delay(self, instance, delay_ms):
|
||||
instance.exec_in_container(["bash", "-c", "tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms)], user="root")
|
||||
instance.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"tc qdisc add dev eth0 root netem delay {}ms".format(delay_ms),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
self._netem_delayed_instances.append(instance)
|
||||
|
||||
def __enter__(self):
|
||||
@ -127,12 +148,12 @@ class _NetworkManager:
|
||||
return cls._instance
|
||||
|
||||
def add_iptables_rule(self, **kwargs):
|
||||
cmd = ['iptables', '--wait', '-I', 'DOCKER-USER', '1']
|
||||
cmd = ["iptables", "--wait", "-I", "DOCKER-USER", "1"]
|
||||
cmd.extend(self._iptables_cmd_suffix(**kwargs))
|
||||
self._exec_run(cmd, privileged=True)
|
||||
|
||||
def delete_iptables_rule(self, **kwargs):
|
||||
cmd = ['iptables', '--wait', '-D', 'DOCKER-USER']
|
||||
cmd = ["iptables", "--wait", "-D", "DOCKER-USER"]
|
||||
cmd.extend(self._iptables_cmd_suffix(**kwargs))
|
||||
self._exec_run(cmd, privileged=True)
|
||||
|
||||
@ -144,40 +165,66 @@ class _NetworkManager:
|
||||
res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True)
|
||||
|
||||
if res.returncode != 0:
|
||||
logging.info("All iptables rules cleared, " + str(iptables_iter) + " iterations, last error: " + str(res.stderr))
|
||||
logging.info(
|
||||
"All iptables rules cleared, "
|
||||
+ str(iptables_iter)
|
||||
+ " iterations, last error: "
|
||||
+ str(res.stderr)
|
||||
)
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def _iptables_cmd_suffix(
|
||||
source=None, destination=None,
|
||||
source_port=None, destination_port=None,
|
||||
action=None, probability=None, custom_args=None):
|
||||
source=None,
|
||||
destination=None,
|
||||
source_port=None,
|
||||
destination_port=None,
|
||||
action=None,
|
||||
probability=None,
|
||||
custom_args=None,
|
||||
):
|
||||
ret = []
|
||||
if probability is not None:
|
||||
ret.extend(['-m', 'statistic', '--mode', 'random', '--probability', str(probability)])
|
||||
ret.extend(['-p', 'tcp'])
|
||||
ret.extend(
|
||||
[
|
||||
"-m",
|
||||
"statistic",
|
||||
"--mode",
|
||||
"random",
|
||||
"--probability",
|
||||
str(probability),
|
||||
]
|
||||
)
|
||||
ret.extend(["-p", "tcp"])
|
||||
if source is not None:
|
||||
ret.extend(['-s', source])
|
||||
ret.extend(["-s", source])
|
||||
if destination is not None:
|
||||
ret.extend(['-d', destination])
|
||||
ret.extend(["-d", destination])
|
||||
if source_port is not None:
|
||||
ret.extend(['--sport', str(source_port)])
|
||||
ret.extend(["--sport", str(source_port)])
|
||||
if destination_port is not None:
|
||||
ret.extend(['--dport', str(destination_port)])
|
||||
ret.extend(["--dport", str(destination_port)])
|
||||
if action is not None:
|
||||
ret.extend(['-j'] + action.split())
|
||||
ret.extend(["-j"] + action.split())
|
||||
if custom_args is not None:
|
||||
ret.extend(custom_args)
|
||||
return ret
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
container_expire_timeout=50, container_exit_timeout=60, docker_api_version=os.environ.get("DOCKER_API_VERSION")):
|
||||
self,
|
||||
container_expire_timeout=50,
|
||||
container_exit_timeout=60,
|
||||
docker_api_version=os.environ.get("DOCKER_API_VERSION"),
|
||||
):
|
||||
|
||||
self.container_expire_timeout = container_expire_timeout
|
||||
self.container_exit_timeout = container_exit_timeout
|
||||
|
||||
self._docker_client = docker.DockerClient(base_url='unix:///var/run/docker.sock', version=docker_api_version, timeout=600)
|
||||
self._docker_client = docker.DockerClient(
|
||||
base_url="unix:///var/run/docker.sock",
|
||||
version=docker_api_version,
|
||||
timeout=600,
|
||||
)
|
||||
|
||||
self._container = None
|
||||
|
||||
@ -194,29 +241,41 @@ class _NetworkManager:
|
||||
except docker.errors.NotFound:
|
||||
break
|
||||
except Exception as ex:
|
||||
print("Error removing network blocade container, will try again", str(ex))
|
||||
print(
|
||||
"Error removing network blocade container, will try again",
|
||||
str(ex),
|
||||
)
|
||||
time.sleep(i)
|
||||
|
||||
image = subprocess.check_output("docker images -q clickhouse/integration-helper 2>/dev/null", shell=True)
|
||||
image = subprocess.check_output(
|
||||
"docker images -q clickhouse/integration-helper 2>/dev/null", shell=True
|
||||
)
|
||||
if not image.strip():
|
||||
print("No network image helper, will try download")
|
||||
# for some reason docker api may hang if image doesn't exist, so we download it
|
||||
# before running
|
||||
for i in range(5):
|
||||
try:
|
||||
subprocess.check_call("docker pull clickhouse/integration-helper", shell=True) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
||||
subprocess.check_call( # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
|
||||
"docker pull clickhouse/integration-helper", shell=True
|
||||
)
|
||||
break
|
||||
except:
|
||||
time.sleep(i)
|
||||
else:
|
||||
raise Exception("Cannot pull clickhouse/integration-helper image")
|
||||
|
||||
self._container = self._docker_client.containers.run('clickhouse/integration-helper',
|
||||
auto_remove=True,
|
||||
command=('sleep %s' % self.container_exit_timeout),
|
||||
# /run/xtables.lock passed inside for correct iptables --wait
|
||||
volumes={'/run/xtables.lock': {'bind': '/run/xtables.lock', 'mode': 'ro' }},
|
||||
detach=True, network_mode='host')
|
||||
self._container = self._docker_client.containers.run(
|
||||
"clickhouse/integration-helper",
|
||||
auto_remove=True,
|
||||
command=("sleep %s" % self.container_exit_timeout),
|
||||
# /run/xtables.lock passed inside for correct iptables --wait
|
||||
volumes={
|
||||
"/run/xtables.lock": {"bind": "/run/xtables.lock", "mode": "ro"}
|
||||
},
|
||||
detach=True,
|
||||
network_mode="host",
|
||||
)
|
||||
container_id = self._container.id
|
||||
self._container_expire_time = time.time() + self.container_expire_timeout
|
||||
|
||||
@ -233,8 +292,8 @@ class _NetworkManager:
|
||||
container = self._ensure_container()
|
||||
|
||||
handle = self._docker_client.api.exec_create(container.id, cmd, **kwargs)
|
||||
output = self._docker_client.api.exec_start(handle).decode('utf8')
|
||||
exit_code = self._docker_client.api.exec_inspect(handle)['ExitCode']
|
||||
output = self._docker_client.api.exec_start(handle).decode("utf8")
|
||||
exit_code = self._docker_client.api.exec_inspect(handle)["ExitCode"]
|
||||
|
||||
if exit_code != 0:
|
||||
print(output)
|
||||
@ -242,30 +301,56 @@ class _NetworkManager:
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# Approximately mesure network I/O speed for interface
|
||||
class NetThroughput(object):
|
||||
def __init__(self, node):
|
||||
self.node = node
|
||||
# trying to get default interface and check it in /proc/net/dev
|
||||
self.interface = self.node.exec_in_container(["bash", "-c", "awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'"]).strip()
|
||||
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip()
|
||||
if not check: # if check is not successful just try eth{1-10}
|
||||
self.interface = self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"awk '{print $1 \" \" $2}' /proc/net/route | grep 00000000 | awk '{print $1}'",
|
||||
]
|
||||
).strip()
|
||||
check = self.node.exec_in_container(
|
||||
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
|
||||
).strip()
|
||||
if not check: # if check is not successful just try eth{1-10}
|
||||
for i in range(10):
|
||||
try:
|
||||
self.interface = self.node.exec_in_container(["bash", "-c", f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'"]).strip()
|
||||
self.interface = self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f"awk '{{print $1}}' /proc/net/route | grep 'eth{i}'",
|
||||
]
|
||||
).strip()
|
||||
break
|
||||
except Exception as ex:
|
||||
print(f"No interface eth{i}")
|
||||
else:
|
||||
raise Exception("No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration")
|
||||
raise Exception(
|
||||
"No interface eth{1-10} and default interface not specified in /proc/net/route, maybe some special network configuration"
|
||||
)
|
||||
|
||||
try:
|
||||
check = self.node.exec_in_container(["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']).strip()
|
||||
check = self.node.exec_in_container(
|
||||
["bash", "-c", f'grep "^ *{self.interface}:" /proc/net/dev']
|
||||
).strip()
|
||||
if not check:
|
||||
raise Exception(f"No such interface {self.interface} found in /proc/net/dev")
|
||||
raise Exception(
|
||||
f"No such interface {self.interface} found in /proc/net/dev"
|
||||
)
|
||||
except:
|
||||
logging.error("All available interfaces %s", self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"]))
|
||||
raise Exception(f"No such interface {self.interface} found in /proc/net/dev")
|
||||
logging.error(
|
||||
"All available interfaces %s",
|
||||
self.node.exec_in_container(["bash", "-c", "cat /proc/net/dev"]),
|
||||
)
|
||||
raise Exception(
|
||||
f"No such interface {self.interface} found in /proc/net/dev"
|
||||
)
|
||||
|
||||
self.current_in = self._get_in_bytes()
|
||||
self.current_out = self._get_out_bytes()
|
||||
@ -273,27 +358,47 @@ class NetThroughput(object):
|
||||
|
||||
def _get_in_bytes(self):
|
||||
try:
|
||||
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev'])
|
||||
result = self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ sub(/^.*:/, "") ; print $1 }} else {{ print $2 }} }}\' /proc/net/dev',
|
||||
]
|
||||
)
|
||||
except:
|
||||
raise Exception(f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}")
|
||||
raise Exception(
|
||||
f"Cannot receive in bytes from /proc/net/dev for interface {self.interface}"
|
||||
)
|
||||
|
||||
try:
|
||||
return int(result)
|
||||
except:
|
||||
raise Exception(f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}")
|
||||
raise Exception(
|
||||
f"Got non-numeric in bytes '{result}' from /proc/net/dev for interface {self.interface}"
|
||||
)
|
||||
|
||||
def _get_out_bytes(self):
|
||||
try:
|
||||
result = self.node.exec_in_container(['bash', '-c', f'awk "/^ *{self.interface}:/"\' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}\' /proc/net/dev'])
|
||||
result = self.node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f"awk \"/^ *{self.interface}:/\"' {{ if ($1 ~ /.*:[0-9][0-9]*/) {{ print $9 }} else {{ print $10 }} }}' /proc/net/dev",
|
||||
]
|
||||
)
|
||||
except:
|
||||
raise Exception(f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}")
|
||||
raise Exception(
|
||||
f"Cannot receive out bytes from /proc/net/dev for interface {self.interface}"
|
||||
)
|
||||
|
||||
try:
|
||||
return int(result)
|
||||
except:
|
||||
raise Exception(f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}")
|
||||
raise Exception(
|
||||
f"Got non-numeric out bytes '{result}' from /proc/net/dev for interface {self.interface}"
|
||||
)
|
||||
|
||||
def measure_speed(self, measure='bytes'):
|
||||
def measure_speed(self, measure="bytes"):
|
||||
new_in = self._get_in_bytes()
|
||||
new_out = self._get_out_bytes()
|
||||
current_time = time.time()
|
||||
@ -304,11 +409,11 @@ class NetThroughput(object):
|
||||
self.current_in = new_in
|
||||
self.measure_time = current_time
|
||||
|
||||
if measure == 'bytes':
|
||||
if measure == "bytes":
|
||||
return in_speed, out_speed
|
||||
elif measure == 'kilobytes':
|
||||
return in_speed / 1024., out_speed / 1024.
|
||||
elif measure == 'megabytes':
|
||||
elif measure == "kilobytes":
|
||||
return in_speed / 1024.0, out_speed / 1024.0
|
||||
elif measure == "megabytes":
|
||||
return in_speed / (1024 * 1024), out_speed / (1024 * 1024)
|
||||
else:
|
||||
raise Exception(f"Unknown measure {measure}")
|
||||
|
@ -23,11 +23,21 @@ postgres_table_template_5 = """
|
||||
key Integer NOT NULL, value UUID, PRIMARY KEY(key))
|
||||
"""
|
||||
|
||||
def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False):
|
||||
|
||||
def get_postgres_conn(
|
||||
ip,
|
||||
port,
|
||||
database=False,
|
||||
auto_commit=True,
|
||||
database_name="postgres_database",
|
||||
replication=False,
|
||||
):
|
||||
if database == True:
|
||||
conn_string = f"host={ip} port={port} dbname='{database_name}' user='postgres' password='mysecretpassword'"
|
||||
else:
|
||||
conn_string = f"host={ip} port={port} user='postgres' password='mysecretpassword'"
|
||||
conn_string = (
|
||||
f"host={ip} port={port} user='postgres' password='mysecretpassword'"
|
||||
)
|
||||
|
||||
if replication:
|
||||
conn_string += " replication='database'"
|
||||
@ -38,33 +48,41 @@ def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name=
|
||||
conn.autocommit = True
|
||||
return conn
|
||||
|
||||
def create_replication_slot(conn, slot_name='user_slot'):
|
||||
|
||||
def create_replication_slot(conn, slot_name="user_slot"):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f'CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT')
|
||||
cursor.execute(
|
||||
f"CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT"
|
||||
)
|
||||
result = cursor.fetchall()
|
||||
print(result[0][0]) # slot name
|
||||
print(result[0][1]) # start lsn
|
||||
print(result[0][2]) # snapshot
|
||||
print(result[0][0]) # slot name
|
||||
print(result[0][1]) # start lsn
|
||||
print(result[0][2]) # snapshot
|
||||
return result[0][2]
|
||||
|
||||
def drop_replication_slot(conn, slot_name='user_slot'):
|
||||
|
||||
def drop_replication_slot(conn, slot_name="user_slot"):
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"select pg_drop_replication_slot('{slot_name}')")
|
||||
|
||||
|
||||
def create_postgres_schema(cursor, schema_name):
|
||||
drop_postgres_schema(cursor, schema_name)
|
||||
cursor.execute(f'CREATE SCHEMA {schema_name}')
|
||||
cursor.execute(f"CREATE SCHEMA {schema_name}")
|
||||
|
||||
|
||||
def drop_postgres_schema(cursor, schema_name):
|
||||
cursor.execute(f'DROP SCHEMA IF EXISTS {schema_name} CASCADE')
|
||||
cursor.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
|
||||
|
||||
|
||||
def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template):
|
||||
def create_postgres_table(
|
||||
cursor, table_name, replica_identity_full=False, template=postgres_table_template
|
||||
):
|
||||
drop_postgres_table(cursor, table_name)
|
||||
cursor.execute(template.format(table_name))
|
||||
if replica_identity_full:
|
||||
cursor.execute(f'ALTER TABLE {table_name} REPLICA IDENTITY FULL;')
|
||||
cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;")
|
||||
|
||||
|
||||
def drop_postgres_table(cursor, table_name):
|
||||
cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """)
|
||||
@ -74,6 +92,7 @@ def create_postgres_table_with_schema(cursor, schema_name, table_name):
|
||||
drop_postgres_table_with_schema(cursor, schema_name, table_name)
|
||||
cursor.execute(postgres_table_template_4.format(schema_name, table_name))
|
||||
|
||||
|
||||
def drop_postgres_table_with_schema(cursor, schema_name, table_name):
|
||||
cursor.execute(f"""DROP TABLE IF EXISTS "{schema_name}"."{table_name}" """)
|
||||
|
||||
@ -102,14 +121,14 @@ class PostgresManager:
|
||||
def prepare(self):
|
||||
conn = get_postgres_conn(ip=self.ip, port=self.port)
|
||||
cursor = conn.cursor()
|
||||
self.create_postgres_db(cursor, 'postgres_database')
|
||||
self.create_postgres_db(cursor, "postgres_database")
|
||||
self.create_clickhouse_postgres_db(ip=self.ip, port=self.port)
|
||||
|
||||
def clear(self):
|
||||
if self.conn.closed == 0:
|
||||
self.conn.close()
|
||||
for db in self.created_materialized_postgres_db_list.copy():
|
||||
self.drop_materialized_db(db);
|
||||
self.drop_materialized_db(db)
|
||||
for db in self.created_ch_postgres_db_list.copy():
|
||||
self.drop_clickhouse_postgres_db(db)
|
||||
if len(self.created_postgres_db_list) > 0:
|
||||
@ -122,38 +141,54 @@ class PostgresManager:
|
||||
self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
|
||||
return self.conn.cursor()
|
||||
|
||||
def create_postgres_db(self, cursor, name='postgres_database'):
|
||||
def create_postgres_db(self, cursor, name="postgres_database"):
|
||||
self.drop_postgres_db(cursor, name)
|
||||
self.created_postgres_db_list.add(name)
|
||||
cursor.execute(f"CREATE DATABASE {name}")
|
||||
|
||||
def drop_postgres_db(self, cursor, name='postgres_database'):
|
||||
def drop_postgres_db(self, cursor, name="postgres_database"):
|
||||
cursor.execute(f"DROP DATABASE IF EXISTS {name}")
|
||||
if name in self.created_postgres_db_list:
|
||||
self.created_postgres_db_list.remove(name)
|
||||
|
||||
def create_clickhouse_postgres_db(self, ip, port, name='postgres_database', database_name='postgres_database', schema_name=''):
|
||||
def create_clickhouse_postgres_db(
|
||||
self,
|
||||
ip,
|
||||
port,
|
||||
name="postgres_database",
|
||||
database_name="postgres_database",
|
||||
schema_name="",
|
||||
):
|
||||
self.drop_clickhouse_postgres_db(name)
|
||||
self.created_ch_postgres_db_list.add(name)
|
||||
|
||||
if len(schema_name) == 0:
|
||||
self.instance.query(f'''
|
||||
self.instance.query(
|
||||
f"""
|
||||
CREATE DATABASE {name}
|
||||
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')''')
|
||||
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')"""
|
||||
)
|
||||
else:
|
||||
self.instance.query(f'''
|
||||
self.instance.query(
|
||||
f"""
|
||||
CREATE DATABASE {name}
|
||||
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')''')
|
||||
ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')"""
|
||||
)
|
||||
|
||||
def drop_clickhouse_postgres_db(self, name='postgres_database'):
|
||||
self.instance.query(f'DROP DATABASE IF EXISTS {name}')
|
||||
def drop_clickhouse_postgres_db(self, name="postgres_database"):
|
||||
self.instance.query(f"DROP DATABASE IF EXISTS {name}")
|
||||
if name in self.created_ch_postgres_db_list:
|
||||
self.created_ch_postgres_db_list.remove(name)
|
||||
|
||||
|
||||
def create_materialized_db(self, ip, port,
|
||||
materialized_database='test_database', postgres_database='postgres_database',
|
||||
settings=[], table_overrides=''):
|
||||
def create_materialized_db(
|
||||
self,
|
||||
ip,
|
||||
port,
|
||||
materialized_database="test_database",
|
||||
postgres_database="postgres_database",
|
||||
settings=[],
|
||||
table_overrides="",
|
||||
):
|
||||
self.created_materialized_postgres_db_list.add(materialized_database)
|
||||
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}")
|
||||
|
||||
@ -162,17 +197,17 @@ class PostgresManager:
|
||||
create_query += " SETTINGS "
|
||||
for i in range(len(settings)):
|
||||
if i != 0:
|
||||
create_query += ', '
|
||||
create_query += ", "
|
||||
create_query += settings[i]
|
||||
create_query += table_overrides
|
||||
self.instance.query(create_query)
|
||||
assert materialized_database in self.instance.query('SHOW DATABASES')
|
||||
assert materialized_database in self.instance.query("SHOW DATABASES")
|
||||
|
||||
def drop_materialized_db(self, materialized_database='test_database'):
|
||||
self.instance.query(f'DROP DATABASE IF EXISTS {materialized_database} NO DELAY')
|
||||
def drop_materialized_db(self, materialized_database="test_database"):
|
||||
self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} NO DELAY")
|
||||
if materialized_database in self.created_materialized_postgres_db_list:
|
||||
self.created_materialized_postgres_db_list.remove(materialized_database)
|
||||
assert materialized_database not in self.instance.query('SHOW DATABASES')
|
||||
assert materialized_database not in self.instance.query("SHOW DATABASES")
|
||||
|
||||
def create_and_fill_postgres_table(self, table_name):
|
||||
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
|
||||
@ -180,82 +215,109 @@ class PostgresManager:
|
||||
self.create_and_fill_postgres_table_from_cursor(cursor, table_name)
|
||||
|
||||
def create_and_fill_postgres_table_from_cursor(self, cursor, table_name):
|
||||
create_postgres_table(cursor, table_name);
|
||||
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)")
|
||||
create_postgres_table(cursor, table_name)
|
||||
self.instance.query(
|
||||
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)"
|
||||
)
|
||||
|
||||
def create_and_fill_postgres_tables(self, tables_num, numbers=50):
|
||||
conn = get_postgres_conn(ip=self.ip, port=self.port, database=True)
|
||||
cursor = conn.cursor()
|
||||
self.create_and_fill_postgres_tables_from_cursor(cursor, tables_num, numbers=numbers)
|
||||
self.create_and_fill_postgres_tables_from_cursor(
|
||||
cursor, tables_num, numbers=numbers
|
||||
)
|
||||
|
||||
def create_and_fill_postgres_tables_from_cursor(self, cursor, tables_num, numbers=50):
|
||||
def create_and_fill_postgres_tables_from_cursor(
|
||||
self, cursor, tables_num, numbers=50
|
||||
):
|
||||
for i in range(tables_num):
|
||||
table_name = f'postgresql_replica_{i}'
|
||||
create_postgres_table(cursor, table_name);
|
||||
table_name = f"postgresql_replica_{i}"
|
||||
create_postgres_table(cursor, table_name)
|
||||
if numbers > 0:
|
||||
self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})")
|
||||
self.instance.query(
|
||||
f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})"
|
||||
)
|
||||
|
||||
|
||||
queries = [
|
||||
'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);',
|
||||
'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;',
|
||||
'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;',
|
||||
"INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);",
|
||||
"DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0",
|
||||
'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);',
|
||||
'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;',
|
||||
'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;',
|
||||
"INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);",
|
||||
"DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;",
|
||||
"UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1",
|
||||
'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;',
|
||||
'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;',
|
||||
'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);',
|
||||
'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;',
|
||||
'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;',
|
||||
"DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;",
|
||||
"INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);",
|
||||
"DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;",
|
||||
"UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1",
|
||||
'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);',
|
||||
'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;',
|
||||
"INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);",
|
||||
"DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;",
|
||||
"UPDATE postgresql_replica_{} SET key=key+10000000",
|
||||
'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;',
|
||||
'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;'
|
||||
]
|
||||
"UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;",
|
||||
"DELETE FROM postgresql_replica_{} WHERE value%5 = 0;",
|
||||
]
|
||||
|
||||
|
||||
def assert_nested_table_is_created(instance, table_name, materialized_database='test_database', schema_name=''):
|
||||
def assert_nested_table_is_created(
|
||||
instance, table_name, materialized_database="test_database", schema_name=""
|
||||
):
|
||||
if len(schema_name) == 0:
|
||||
table = table_name
|
||||
else:
|
||||
table = schema_name + "." + table_name
|
||||
|
||||
print(f'Checking table {table} exists in {materialized_database}')
|
||||
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}')
|
||||
print(f"Checking table {table} exists in {materialized_database}")
|
||||
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
|
||||
|
||||
while table not in database_tables:
|
||||
time.sleep(0.2)
|
||||
database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}')
|
||||
database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}")
|
||||
|
||||
assert(table in database_tables)
|
||||
assert table in database_tables
|
||||
|
||||
|
||||
def assert_number_of_columns(instance, expected, table_name, database_name='test_database'):
|
||||
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')")
|
||||
while (int(result) != expected):
|
||||
def assert_number_of_columns(
|
||||
instance, expected, table_name, database_name="test_database"
|
||||
):
|
||||
result = instance.query(
|
||||
f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
|
||||
)
|
||||
while int(result) != expected:
|
||||
time.sleep(1)
|
||||
result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')")
|
||||
print('Number of columns ok')
|
||||
result = instance.query(
|
||||
f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')"
|
||||
)
|
||||
print("Number of columns ok")
|
||||
|
||||
|
||||
def check_tables_are_synchronized(instance, table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''):
|
||||
assert_nested_table_is_created(instance, table_name, materialized_database, schema_name)
|
||||
def check_tables_are_synchronized(
|
||||
instance,
|
||||
table_name,
|
||||
order_by="key",
|
||||
postgres_database="postgres_database",
|
||||
materialized_database="test_database",
|
||||
schema_name="",
|
||||
):
|
||||
assert_nested_table_is_created(
|
||||
instance, table_name, materialized_database, schema_name
|
||||
)
|
||||
|
||||
table_path = ''
|
||||
table_path = ""
|
||||
if len(schema_name) == 0:
|
||||
table_path = f'{materialized_database}.{table_name}'
|
||||
table_path = f"{materialized_database}.{table_name}"
|
||||
else:
|
||||
table_path = f'{materialized_database}.`{schema_name}.{table_name}`'
|
||||
table_path = f"{materialized_database}.`{schema_name}.{table_name}`"
|
||||
|
||||
print(f"Checking table is synchronized: {table_path}")
|
||||
result_query = f'select * from {table_path} order by {order_by};'
|
||||
result_query = f"select * from {table_path} order by {order_by};"
|
||||
|
||||
expected = instance.query(f'select * from {postgres_database}.{table_name} order by {order_by};')
|
||||
expected = instance.query(
|
||||
f"select * from {postgres_database}.{table_name} order by {order_by};"
|
||||
)
|
||||
result = instance.query(result_query)
|
||||
|
||||
for _ in range(30):
|
||||
@ -265,9 +327,16 @@ def check_tables_are_synchronized(instance, table_name, order_by='key', postgres
|
||||
time.sleep(0.5)
|
||||
result = instance.query(result_query)
|
||||
|
||||
assert(result == expected)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def check_several_tables_are_synchronized(instance, tables_num, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''):
|
||||
def check_several_tables_are_synchronized(
|
||||
instance,
|
||||
tables_num,
|
||||
order_by="key",
|
||||
postgres_database="postgres_database",
|
||||
materialized_database="test_database",
|
||||
schema_name="",
|
||||
):
|
||||
for i in range(tables_num):
|
||||
check_tables_are_synchronized(instance, f'postgresql_replica_{i}');
|
||||
check_tables_are_synchronized(instance, f"postgresql_replica_{i}")
|
||||
|
@ -5,17 +5,17 @@ import os.path
|
||||
# Without this function all workers will log to the same log file
|
||||
# and mix everything together making it much more difficult for troubleshooting.
|
||||
def setup():
|
||||
worker_name = os.environ.get('PYTEST_XDIST_WORKER', 'master')
|
||||
if worker_name == 'master':
|
||||
worker_name = os.environ.get("PYTEST_XDIST_WORKER", "master")
|
||||
if worker_name == "master":
|
||||
return
|
||||
logger = logging.getLogger('')
|
||||
logger = logging.getLogger("")
|
||||
new_handlers = []
|
||||
handlers_to_remove = []
|
||||
for handler in logger.handlers:
|
||||
if isinstance(handler, logging.FileHandler):
|
||||
filename, ext = os.path.splitext(handler.baseFilename)
|
||||
if not filename.endswith('-' + worker_name):
|
||||
new_filename = filename + '-' + worker_name
|
||||
if not filename.endswith("-" + worker_name):
|
||||
new_filename = filename + "-" + worker_name
|
||||
new_handler = logging.FileHandler(new_filename + ext)
|
||||
new_handler.setFormatter(handler.formatter)
|
||||
new_handler.setLevel(handler.level)
|
||||
|
@ -13,12 +13,18 @@ class TSV:
|
||||
elif isinstance(contents, str) or isinstance(contents, str):
|
||||
raw_lines = contents.splitlines(True)
|
||||
elif isinstance(contents, list):
|
||||
raw_lines = ['\t'.join(map(str, l)) if isinstance(l, list) else str(l) for l in contents]
|
||||
raw_lines = [
|
||||
"\t".join(map(str, l)) if isinstance(l, list) else str(l)
|
||||
for l in contents
|
||||
]
|
||||
elif isinstance(contents, TSV):
|
||||
self.lines = contents.lines
|
||||
return
|
||||
else:
|
||||
raise TypeError("contents must be either file or string or list, actual type: " + type(contents).__name__)
|
||||
raise TypeError(
|
||||
"contents must be either file or string or list, actual type: "
|
||||
+ type(contents).__name__
|
||||
)
|
||||
self.lines = [l.strip() for l in raw_lines if l.strip()]
|
||||
|
||||
def __eq__(self, other):
|
||||
@ -31,13 +37,18 @@ class TSV:
|
||||
return self != TSV(other)
|
||||
return self.lines != other.lines
|
||||
|
||||
def diff(self, other, n1='', n2=''):
|
||||
def diff(self, other, n1="", n2=""):
|
||||
if not isinstance(other, TSV):
|
||||
return self.diff(TSV(other), n1=n1, n2=n2)
|
||||
return list(line.rstrip() for line in difflib.unified_diff(self.lines, other.lines, fromfile=n1, tofile=n2))[2:]
|
||||
return list(
|
||||
line.rstrip()
|
||||
for line in difflib.unified_diff(
|
||||
self.lines, other.lines, fromfile=n1, tofile=n2
|
||||
)
|
||||
)[2:]
|
||||
|
||||
def __str__(self):
|
||||
return '\n'.join(self.lines)
|
||||
return "\n".join(self.lines)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
@ -50,29 +61,70 @@ class TSV:
|
||||
return [line.split("\t") for line in contents.split("\n") if line.strip()]
|
||||
|
||||
|
||||
def assert_eq_with_retry(instance, query, expectation, retry_count=20, sleep_time=0.5, stdin=None, timeout=None,
|
||||
settings=None, user=None, ignore_error=False, get_result=lambda x: x):
|
||||
def assert_eq_with_retry(
|
||||
instance,
|
||||
query,
|
||||
expectation,
|
||||
retry_count=20,
|
||||
sleep_time=0.5,
|
||||
stdin=None,
|
||||
timeout=None,
|
||||
settings=None,
|
||||
user=None,
|
||||
ignore_error=False,
|
||||
get_result=lambda x: x,
|
||||
):
|
||||
expectation_tsv = TSV(expectation)
|
||||
for i in range(retry_count):
|
||||
try:
|
||||
if TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
|
||||
ignore_error=ignore_error))) == expectation_tsv:
|
||||
if (
|
||||
TSV(
|
||||
get_result(
|
||||
instance.query(
|
||||
query,
|
||||
user=user,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
ignore_error=ignore_error,
|
||||
)
|
||||
)
|
||||
)
|
||||
== expectation_tsv
|
||||
):
|
||||
break
|
||||
time.sleep(sleep_time)
|
||||
except Exception as ex:
|
||||
logging.exception(f"assert_eq_with_retry retry {i+1} exception {ex}")
|
||||
time.sleep(sleep_time)
|
||||
else:
|
||||
val = TSV(get_result(instance.query(query, user=user, stdin=stdin, timeout=timeout, settings=settings,
|
||||
ignore_error=ignore_error)))
|
||||
val = TSV(
|
||||
get_result(
|
||||
instance.query(
|
||||
query,
|
||||
user=user,
|
||||
stdin=stdin,
|
||||
timeout=timeout,
|
||||
settings=settings,
|
||||
ignore_error=ignore_error,
|
||||
)
|
||||
)
|
||||
)
|
||||
if expectation_tsv != val:
|
||||
raise AssertionError("'{}' != '{}'\n{}".format(expectation_tsv, val, '\n'.join(
|
||||
expectation_tsv.diff(val, n1="expectation", n2="query"))))
|
||||
raise AssertionError(
|
||||
"'{}' != '{}'\n{}".format(
|
||||
expectation_tsv,
|
||||
val,
|
||||
"\n".join(expectation_tsv.diff(val, n1="expectation", n2="query")),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def assert_logs_contain(instance, substring):
|
||||
if not instance.contains_in_log(substring):
|
||||
raise AssertionError("'{}' not found in logs".format(substring))
|
||||
|
||||
|
||||
def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_time=0.5):
|
||||
for i in range(retry_count):
|
||||
try:
|
||||
@ -85,7 +137,10 @@ def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_ti
|
||||
else:
|
||||
raise AssertionError("'{}' not found in logs".format(substring))
|
||||
|
||||
def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}):
|
||||
|
||||
def exec_query_with_retry(
|
||||
instance, query, retry_count=40, sleep_time=0.5, silent=False, settings={}
|
||||
):
|
||||
exception = None
|
||||
for cnt in range(retry_count):
|
||||
try:
|
||||
@ -96,16 +151,21 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen
|
||||
except Exception as ex:
|
||||
exception = ex
|
||||
if not silent:
|
||||
logging.exception(f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry")
|
||||
logging.exception(
|
||||
f"Failed to execute query '{query}' on {cnt} try on instance '{instance.name}' will retry"
|
||||
)
|
||||
time.sleep(sleep_time)
|
||||
else:
|
||||
raise exception
|
||||
|
||||
|
||||
def csv_compare(result, expected):
|
||||
csv_result = TSV(result)
|
||||
csv_expected = TSV(expected)
|
||||
mismatch = []
|
||||
max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected)
|
||||
max_len = (
|
||||
len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected)
|
||||
)
|
||||
for i in range(max_len):
|
||||
if i >= len(csv_result):
|
||||
mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i]))
|
||||
|
@ -8,30 +8,30 @@ sys.path.insert(0, os.path.join(CURDIR))
|
||||
|
||||
from . import uexpect
|
||||
|
||||
prompt = ':\) '
|
||||
end_of_block = r'.*\r\n.*\r\n'
|
||||
prompt = ":\) "
|
||||
end_of_block = r".*\r\n.*\r\n"
|
||||
|
||||
|
||||
class client(object):
|
||||
def __init__(self, command=None, name='', log=None):
|
||||
self.client = uexpect.spawn(['/bin/bash', '--noediting'])
|
||||
def __init__(self, command=None, name="", log=None):
|
||||
self.client = uexpect.spawn(["/bin/bash", "--noediting"])
|
||||
if command is None:
|
||||
command = '/usr/bin/clickhouse-client'
|
||||
command = "/usr/bin/clickhouse-client"
|
||||
self.client.command = command
|
||||
self.client.eol('\r')
|
||||
self.client.eol("\r")
|
||||
self.client.logger(log, prefix=name)
|
||||
self.client.timeout(20)
|
||||
self.client.expect('[#\$] ', timeout=2)
|
||||
self.client.expect("[#\$] ", timeout=2)
|
||||
self.client.send(command)
|
||||
|
||||
def __enter__(self):
|
||||
return self.client.__enter__()
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.client.reader['kill_event'].set()
|
||||
self.client.reader["kill_event"].set()
|
||||
# send Ctrl-C
|
||||
self.client.send('\x03', eol='')
|
||||
self.client.send("\x03", eol="")
|
||||
time.sleep(0.3)
|
||||
self.client.send('quit', eol='\r')
|
||||
self.client.send('\x03', eol='')
|
||||
self.client.send("quit", eol="\r")
|
||||
self.client.send("\x03", eol="")
|
||||
return self.client.__exit__(type, value, traceback)
|
||||
|
@ -25,7 +25,7 @@ class TimeoutError(Exception):
|
||||
self.timeout = timeout
|
||||
|
||||
def __str__(self):
|
||||
return 'Timeout %.3fs' % float(self.timeout)
|
||||
return "Timeout %.3fs" % float(self.timeout)
|
||||
|
||||
|
||||
class ExpectTimeoutError(Exception):
|
||||
@ -35,12 +35,12 @@ class ExpectTimeoutError(Exception):
|
||||
self.buffer = buffer
|
||||
|
||||
def __str__(self):
|
||||
s = 'Timeout %.3fs ' % float(self.timeout)
|
||||
s = "Timeout %.3fs " % float(self.timeout)
|
||||
if self.pattern:
|
||||
s += 'for %s ' % repr(self.pattern.pattern)
|
||||
s += "for %s " % repr(self.pattern.pattern)
|
||||
if self.buffer:
|
||||
s += 'buffer %s ' % repr(self.buffer[:])
|
||||
s += 'or \'%s\'' % ','.join(['%x' % ord(c) for c in self.buffer[:]])
|
||||
s += "buffer %s " % repr(self.buffer[:])
|
||||
s += "or '%s'" % ",".join(["%x" % ord(c) for c in self.buffer[:]])
|
||||
return s
|
||||
|
||||
|
||||
@ -55,12 +55,12 @@ class IO(object):
|
||||
TIMEOUT = Timeout
|
||||
|
||||
class Logger(object):
|
||||
def __init__(self, logger, prefix=''):
|
||||
def __init__(self, logger, prefix=""):
|
||||
self._logger = logger
|
||||
self._prefix = prefix
|
||||
|
||||
def write(self, data):
|
||||
self._logger.write(('\n' + data).replace('\n', '\n' + self._prefix))
|
||||
self._logger.write(("\n" + data).replace("\n", "\n" + self._prefix))
|
||||
|
||||
def flush(self):
|
||||
self._logger.flush()
|
||||
@ -77,7 +77,7 @@ class IO(object):
|
||||
self.reader = reader
|
||||
self._timeout = None
|
||||
self._logger = None
|
||||
self._eol = ''
|
||||
self._eol = ""
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
@ -85,7 +85,7 @@ class IO(object):
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
def logger(self, logger=None, prefix=''):
|
||||
def logger(self, logger=None, prefix=""):
|
||||
if logger:
|
||||
self._logger = self.Logger(logger, prefix=prefix)
|
||||
return self._logger
|
||||
@ -101,15 +101,15 @@ class IO(object):
|
||||
return self._eol
|
||||
|
||||
def close(self, force=True):
|
||||
self.reader['kill_event'].set()
|
||||
os.system('pkill -TERM -P %d' % self.process.pid)
|
||||
self.reader["kill_event"].set()
|
||||
os.system("pkill -TERM -P %d" % self.process.pid)
|
||||
if force:
|
||||
self.process.kill()
|
||||
else:
|
||||
self.process.terminate()
|
||||
os.close(self.master)
|
||||
if self._logger:
|
||||
self._logger.write('\n')
|
||||
self._logger.write("\n")
|
||||
self._logger.flush()
|
||||
|
||||
def send(self, data, eol=None):
|
||||
@ -135,9 +135,9 @@ class IO(object):
|
||||
if self.buffer is not None:
|
||||
self.match = pattern.search(self.buffer, 0)
|
||||
if self.match is not None:
|
||||
self.after = self.buffer[self.match.start():self.match.end()]
|
||||
self.before = self.buffer[:self.match.start()]
|
||||
self.buffer = self.buffer[self.match.end():]
|
||||
self.after = self.buffer[self.match.start() : self.match.end()]
|
||||
self.before = self.buffer[: self.match.start()]
|
||||
self.buffer = self.buffer[self.match.end() :]
|
||||
break
|
||||
if timeleft < 0:
|
||||
break
|
||||
@ -145,16 +145,16 @@ class IO(object):
|
||||
data = self.read(timeout=timeleft, raise_exception=True)
|
||||
except TimeoutError:
|
||||
if self._logger:
|
||||
self._logger.write((self.buffer or '') + '\n')
|
||||
self._logger.write((self.buffer or "") + "\n")
|
||||
self._logger.flush()
|
||||
exception = ExpectTimeoutError(pattern, timeout, self.buffer)
|
||||
self.buffer = None
|
||||
raise exception
|
||||
timeleft -= (time.time() - start_time)
|
||||
timeleft -= time.time() - start_time
|
||||
if data:
|
||||
self.buffer = (self.buffer + data) if self.buffer else data
|
||||
if self._logger:
|
||||
self._logger.write((self.before or '') + (self.after or ''))
|
||||
self._logger.write((self.before or "") + (self.after or ""))
|
||||
self._logger.flush()
|
||||
if self.match is None:
|
||||
exception = ExpectTimeoutError(pattern, timeout, self.buffer)
|
||||
@ -163,7 +163,7 @@ class IO(object):
|
||||
return self.match
|
||||
|
||||
def read(self, timeout=0, raise_exception=False):
|
||||
data = ''
|
||||
data = ""
|
||||
timeleft = timeout
|
||||
try:
|
||||
while timeleft >= 0:
|
||||
@ -171,7 +171,7 @@ class IO(object):
|
||||
data += self.queue.get(timeout=timeleft)
|
||||
if data:
|
||||
break
|
||||
timeleft -= (time.time() - start_time)
|
||||
timeleft -= time.time() - start_time
|
||||
except Empty:
|
||||
if data:
|
||||
return data
|
||||
@ -186,7 +186,14 @@ class IO(object):
|
||||
|
||||
def spawn(command):
|
||||
master, slave = pty.openpty()
|
||||
process = Popen(command, preexec_fn=os.setsid, stdout=slave, stdin=slave, stderr=slave, bufsize=1)
|
||||
process = Popen(
|
||||
command,
|
||||
preexec_fn=os.setsid,
|
||||
stdout=slave,
|
||||
stdin=slave,
|
||||
stderr=slave,
|
||||
bufsize=1,
|
||||
)
|
||||
os.close(slave)
|
||||
|
||||
queue = Queue()
|
||||
@ -195,14 +202,19 @@ def spawn(command):
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
return IO(process, master, queue, reader={'thread': thread, 'kill_event': reader_kill_event})
|
||||
return IO(
|
||||
process,
|
||||
master,
|
||||
queue,
|
||||
reader={"thread": thread, "kill_event": reader_kill_event},
|
||||
)
|
||||
|
||||
|
||||
def reader(process, out, queue, kill_event):
|
||||
while True:
|
||||
try:
|
||||
# TODO: there are some issues with 1<<16 buffer size
|
||||
data = os.read(out, 1<<17).decode(errors='replace')
|
||||
data = os.read(out, 1 << 17).decode(errors="replace")
|
||||
queue.put(data)
|
||||
except:
|
||||
if kill_event.is_set():
|
||||
|
@ -11,11 +11,13 @@ class SafeThread(threading.Thread):
|
||||
super().__init__()
|
||||
self.target = target
|
||||
self.exception = None
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.target()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
self.exception = e
|
||||
|
||||
def join(self, timeout=None):
|
||||
super().join(timeout)
|
||||
if self.exception:
|
||||
@ -24,7 +26,7 @@ class SafeThread(threading.Thread):
|
||||
|
||||
def random_string(length):
|
||||
letters = string.ascii_letters
|
||||
return ''.join(random.choice(letters) for i in range(length))
|
||||
return "".join(random.choice(letters) for i in range(length))
|
||||
|
||||
|
||||
def generate_values(date_str, count, sign=1):
|
||||
@ -34,10 +36,10 @@ def generate_values(date_str, count, sign=1):
|
||||
|
||||
|
||||
def replace_config(config_path, old, new):
|
||||
config = open(config_path, 'r')
|
||||
config = open(config_path, "r")
|
||||
config_lines = config.readlines()
|
||||
config.close()
|
||||
config_lines = [line.replace(old, new) for line in config_lines]
|
||||
config = open(config_path, 'w')
|
||||
config = open(config_path, "w")
|
||||
config.writelines(config_lines)
|
||||
config.close()
|
||||
|
@ -19,14 +19,19 @@ from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node = cluster.add_instance('node', main_configs=[
|
||||
'configs/no_system_log.xml',
|
||||
'configs/asynchronous_metrics_update_period_s.xml',
|
||||
], user_configs=[
|
||||
'configs/users.d/overrides.xml',
|
||||
])
|
||||
node = cluster.add_instance(
|
||||
"node",
|
||||
main_configs=[
|
||||
"configs/no_system_log.xml",
|
||||
"configs/asynchronous_metrics_update_period_s.xml",
|
||||
],
|
||||
user_configs=[
|
||||
"configs/users.d/overrides.xml",
|
||||
],
|
||||
)
|
||||
|
||||
@pytest.fixture(scope='module', autouse=True)
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
@ -34,31 +39,39 @@ def start_cluster():
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
query_settings = {
|
||||
'max_threads': 1,
|
||||
'log_queries': 0,
|
||||
"max_threads": 1,
|
||||
"log_queries": 0,
|
||||
}
|
||||
sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON"
|
||||
|
||||
|
||||
def query(*args, **kwargs):
|
||||
if 'settings' not in kwargs:
|
||||
kwargs['settings'] = query_settings
|
||||
if "settings" not in kwargs:
|
||||
kwargs["settings"] = query_settings
|
||||
else:
|
||||
kwargs['settings'].update(query_settings)
|
||||
kwargs["settings"].update(query_settings)
|
||||
return node.query(*args, **kwargs)
|
||||
|
||||
|
||||
def http_query(*args, **kwargs):
|
||||
if 'params' not in kwargs:
|
||||
kwargs['params'] = query_settings
|
||||
if "params" not in kwargs:
|
||||
kwargs["params"] = query_settings
|
||||
else:
|
||||
kwargs['params'].update(query_settings)
|
||||
kwargs["params"].update(query_settings)
|
||||
return node.http_query(*args, **kwargs)
|
||||
|
||||
|
||||
def get_MemoryTracking():
|
||||
return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'"))
|
||||
return int(
|
||||
http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'")
|
||||
)
|
||||
|
||||
|
||||
def check_memory(memory):
|
||||
# bytes -> megabytes
|
||||
memory = [*map(lambda x: int(int(x)/1024/1024), memory)]
|
||||
memory = [*map(lambda x: int(int(x) / 1024 / 1024), memory)]
|
||||
# 3 changes to MemoryTracking is minimum, since:
|
||||
# - this is not that high to not detect inacuracy
|
||||
# - memory can go like X/X+N due to some background allocations
|
||||
@ -66,14 +79,19 @@ def check_memory(memory):
|
||||
changes_allowed = 3
|
||||
# if number of samples is large enough, use 10% from them
|
||||
# (actually most of the time there will be only few changes, it was made 10% to avoid flackiness)
|
||||
changes_allowed_auto=int(len(memory) * 0.1)
|
||||
changes_allowed_auto = int(len(memory) * 0.1)
|
||||
changes_allowed = max(changes_allowed_auto, changes_allowed)
|
||||
|
||||
changed=len(set(memory))
|
||||
logging.info('Changes: allowed=%s, actual=%s, sample=%s',
|
||||
changes_allowed, changed, len(memory))
|
||||
changed = len(set(memory))
|
||||
logging.info(
|
||||
"Changes: allowed=%s, actual=%s, sample=%s",
|
||||
changes_allowed,
|
||||
changed,
|
||||
len(memory),
|
||||
)
|
||||
assert changed < changes_allowed
|
||||
|
||||
|
||||
def test_http():
|
||||
memory = []
|
||||
memory.append(get_MemoryTracking())
|
||||
@ -82,6 +100,7 @@ def test_http():
|
||||
memory.append(get_MemoryTracking())
|
||||
check_memory(memory)
|
||||
|
||||
|
||||
def test_tcp_multiple_sessions():
|
||||
memory = []
|
||||
memory.append(get_MemoryTracking())
|
||||
@ -90,6 +109,7 @@ def test_tcp_multiple_sessions():
|
||||
memory.append(get_MemoryTracking())
|
||||
check_memory(memory)
|
||||
|
||||
|
||||
def test_tcp_single_session():
|
||||
memory = []
|
||||
memory.append(get_MemoryTracking())
|
||||
@ -97,9 +117,9 @@ def test_tcp_single_session():
|
||||
sample_query,
|
||||
"SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'",
|
||||
] * 100
|
||||
rows = query(';'.join(sample_queries))
|
||||
memory = rows.split('\n')
|
||||
memory = filter(lambda x: x.startswith('MemoryTracking'), memory)
|
||||
memory = map(lambda x: x.split('\t')[1], memory)
|
||||
rows = query(";".join(sample_queries))
|
||||
memory = rows.split("\n")
|
||||
memory = filter(lambda x: x.startswith("MemoryTracking"), memory)
|
||||
memory = map(lambda x: x.split("\t")[1], memory)
|
||||
memory = [*memory]
|
||||
check_memory(memory)
|
||||
|
@ -2,9 +2,15 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
|
||||
ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
|
||||
ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
|
||||
ch1 = cluster.add_instance(
|
||||
"ch1", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
|
||||
)
|
||||
ch2 = cluster.add_instance(
|
||||
"ch2", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
|
||||
)
|
||||
ch3 = cluster.add_instance(
|
||||
"ch3", main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
@ -18,17 +24,23 @@ def started_cluster():
|
||||
|
||||
|
||||
def test_access_control_on_cluster():
|
||||
ch1.query_with_retry("CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5)
|
||||
ch1.query_with_retry(
|
||||
"CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5
|
||||
)
|
||||
assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
|
||||
assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
|
||||
assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n"
|
||||
|
||||
ch2.query_with_retry("GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3)
|
||||
ch2.query_with_retry(
|
||||
"GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3
|
||||
)
|
||||
assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
|
||||
assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
|
||||
assert ch3.query("SHOW GRANTS FOR Alex") == "GRANT SELECT ON *.* TO Alex\n"
|
||||
|
||||
ch3.query_with_retry("REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3)
|
||||
ch3.query_with_retry(
|
||||
"REVOKE ON CLUSTER 'cluster' SELECT ON *.* FROM Alex", retry_count=3
|
||||
)
|
||||
assert ch1.query("SHOW GRANTS FOR Alex") == ""
|
||||
assert ch2.query("SHOW GRANTS FOR Alex") == ""
|
||||
assert ch3.query("SHOW GRANTS FOR Alex") == ""
|
||||
|
@ -3,7 +3,7 @@ import uuid
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
instance = cluster.add_instance('instance', stay_alive=True)
|
||||
instance = cluster.add_instance("instance", stay_alive=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
@ -21,21 +21,32 @@ def test_access_rights_for_function():
|
||||
|
||||
instance.query("CREATE USER A")
|
||||
instance.query("CREATE USER B")
|
||||
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A')
|
||||
assert (
|
||||
"it's necessary to have grant CREATE FUNCTION ON *.*"
|
||||
in instance.query_and_get_error(create_function_query, user="A")
|
||||
)
|
||||
|
||||
instance.query("GRANT CREATE FUNCTION on *.* TO A")
|
||||
|
||||
instance.query(create_function_query, user = 'A')
|
||||
instance.query(create_function_query, user="A")
|
||||
assert instance.query("SELECT MySum(1, 2)") == "3\n"
|
||||
|
||||
assert "it's necessary to have grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user = 'B')
|
||||
assert (
|
||||
"it's necessary to have grant DROP FUNCTION ON *.*"
|
||||
in instance.query_and_get_error("DROP FUNCTION MySum", user="B")
|
||||
)
|
||||
|
||||
instance.query("GRANT DROP FUNCTION ON *.* TO B")
|
||||
instance.query("DROP FUNCTION MySum", user = 'B')
|
||||
assert "Unknown function MySum" in instance.query_and_get_error("SELECT MySum(1, 2)")
|
||||
instance.query("DROP FUNCTION MySum", user="B")
|
||||
assert "Unknown function MySum" in instance.query_and_get_error(
|
||||
"SELECT MySum(1, 2)"
|
||||
)
|
||||
|
||||
instance.query("REVOKE CREATE FUNCTION ON *.* FROM A")
|
||||
assert "it's necessary to have grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user = 'A')
|
||||
assert (
|
||||
"it's necessary to have grant CREATE FUNCTION ON *.*"
|
||||
in instance.query_and_get_error(create_function_query, user="A")
|
||||
)
|
||||
|
||||
instance.query("DROP USER IF EXISTS A")
|
||||
instance.query("DROP USER IF EXISTS B")
|
||||
@ -45,13 +56,21 @@ def test_ignore_obsolete_grant_on_database():
|
||||
instance.stop_clickhouse()
|
||||
|
||||
user_id = uuid.uuid4()
|
||||
instance.exec_in_container(["bash", "-c" , f"""
|
||||
instance.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f"""
|
||||
cat > /var/lib/clickhouse/access/{user_id}.sql << EOF
|
||||
ATTACH USER X;
|
||||
ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X;
|
||||
EOF"""])
|
||||
EOF""",
|
||||
]
|
||||
)
|
||||
|
||||
instance.exec_in_container(["bash", "-c" , "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"])
|
||||
instance.exec_in_container(
|
||||
["bash", "-c", "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]
|
||||
)
|
||||
instance.start_clickhouse()
|
||||
|
||||
assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n"
|
||||
|
@ -3,8 +3,8 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1')
|
||||
node2 = cluster.add_instance('node2')
|
||||
node1 = cluster.add_instance("node1")
|
||||
node2 = cluster.add_instance("node2")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -14,10 +14,15 @@ def start_cluster():
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
"create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;")
|
||||
"create table da_memory_efficient_shard(A Int64, B Int64) Engine=MergeTree order by A partition by B % 2;"
|
||||
)
|
||||
|
||||
node1.query("insert into da_memory_efficient_shard select number, number from numbers(100000);")
|
||||
node2.query("insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);")
|
||||
node1.query(
|
||||
"insert into da_memory_efficient_shard select number, number from numbers(100000);"
|
||||
)
|
||||
node2.query(
|
||||
"insert into da_memory_efficient_shard select number + 100000, number from numbers(100000);"
|
||||
)
|
||||
|
||||
yield cluster
|
||||
|
||||
@ -27,23 +32,29 @@ def start_cluster():
|
||||
|
||||
def test_remote(start_cluster):
|
||||
node1.query(
|
||||
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1")
|
||||
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
|
||||
)
|
||||
res = node1.query(
|
||||
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)")
|
||||
assert res == '200000\n'
|
||||
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
|
||||
)
|
||||
assert res == "200000\n"
|
||||
|
||||
node1.query("set distributed_aggregation_memory_efficient = 0")
|
||||
res = node1.query(
|
||||
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)")
|
||||
assert res == '200000\n'
|
||||
"select sum(a) from (SELECT B, uniqExact(A) a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY B)"
|
||||
)
|
||||
assert res == "200000\n"
|
||||
|
||||
node1.query(
|
||||
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1")
|
||||
"set distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 1, group_by_two_level_threshold_bytes=1"
|
||||
)
|
||||
res = node1.query(
|
||||
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;")
|
||||
assert res == 'node1\t100000\nnode2\t100000\n'
|
||||
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
|
||||
)
|
||||
assert res == "node1\t100000\nnode2\t100000\n"
|
||||
|
||||
node1.query("set distributed_aggregation_memory_efficient = 0")
|
||||
res = node1.query(
|
||||
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;")
|
||||
assert res == 'node1\t100000\nnode2\t100000\n'
|
||||
"SELECT fullHostName() AS h, uniqExact(A) AS a FROM remote('node{1,2}', default.da_memory_efficient_shard) GROUP BY h ORDER BY h;"
|
||||
)
|
||||
assert res == "node1\t100000\nnode2\t100000\n"
|
||||
|
@ -2,31 +2,42 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
server = cluster.add_instance('server', user_configs=["configs/users.d/network.xml"])
|
||||
server = cluster.add_instance("server", user_configs=["configs/users.d/network.xml"])
|
||||
|
||||
clientA1 = cluster.add_instance('clientA1', hostname='clientA1.com')
|
||||
clientA2 = cluster.add_instance('clientA2', hostname='clientA2.com')
|
||||
clientA3 = cluster.add_instance('clientA3', hostname='clientA3.com')
|
||||
clientB1 = cluster.add_instance('clientB1', hostname='clientB001.ru')
|
||||
clientB2 = cluster.add_instance('clientB2', hostname='clientB002.ru')
|
||||
clientB3 = cluster.add_instance('clientB3', hostname='xxx.clientB003.rutracker.com')
|
||||
clientC1 = cluster.add_instance('clientC1', hostname='clientC01.ru')
|
||||
clientC2 = cluster.add_instance('clientC2', hostname='xxx.clientC02.ru')
|
||||
clientC3 = cluster.add_instance('clientC3', hostname='xxx.clientC03.rutracker.com')
|
||||
clientD1 = cluster.add_instance('clientD1', hostname='clientD0001.ru')
|
||||
clientD2 = cluster.add_instance('clientD2', hostname='xxx.clientD0002.ru')
|
||||
clientD3 = cluster.add_instance('clientD3', hostname='clientD0003.ru')
|
||||
clientA1 = cluster.add_instance("clientA1", hostname="clientA1.com")
|
||||
clientA2 = cluster.add_instance("clientA2", hostname="clientA2.com")
|
||||
clientA3 = cluster.add_instance("clientA3", hostname="clientA3.com")
|
||||
clientB1 = cluster.add_instance("clientB1", hostname="clientB001.ru")
|
||||
clientB2 = cluster.add_instance("clientB2", hostname="clientB002.ru")
|
||||
clientB3 = cluster.add_instance("clientB3", hostname="xxx.clientB003.rutracker.com")
|
||||
clientC1 = cluster.add_instance("clientC1", hostname="clientC01.ru")
|
||||
clientC2 = cluster.add_instance("clientC2", hostname="xxx.clientC02.ru")
|
||||
clientC3 = cluster.add_instance("clientC3", hostname="xxx.clientC03.rutracker.com")
|
||||
clientD1 = cluster.add_instance("clientD1", hostname="clientD0001.ru")
|
||||
clientD2 = cluster.add_instance("clientD2", hostname="xxx.clientD0002.ru")
|
||||
clientD3 = cluster.add_instance("clientD3", hostname="clientD0003.ru")
|
||||
|
||||
|
||||
def check_clickhouse_is_ok(client_node, server_node):
|
||||
assert client_node.exec_in_container(
|
||||
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]) == "Ok.\n"
|
||||
assert (
|
||||
client_node.exec_in_container(
|
||||
["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]
|
||||
)
|
||||
== "Ok.\n"
|
||||
)
|
||||
|
||||
|
||||
def query_from_one_node_to_another(client_node, server_node, query):
|
||||
check_clickhouse_is_ok(client_node, server_node)
|
||||
return client_node.exec_in_container(
|
||||
["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)])
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"/usr/bin/clickhouse client --host {} --query {!r}".format(
|
||||
server_node.hostname, query
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def query(node, query):
|
||||
@ -38,7 +49,10 @@ def setup_nodes():
|
||||
try:
|
||||
cluster.start()
|
||||
query(server, "DROP TABLE IF EXISTS test_allowed_client_hosts")
|
||||
query(server, "CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()")
|
||||
query(
|
||||
server,
|
||||
"CREATE TABLE test_allowed_client_hosts (x Int32) ENGINE = MergeTree() ORDER BY tuple()",
|
||||
)
|
||||
query(server, "INSERT INTO test_allowed_client_hosts VALUES (5)")
|
||||
|
||||
yield cluster
|
||||
@ -58,8 +72,15 @@ def test_allowed_host():
|
||||
# expected_to_fail.extend([clientC3, clientD2])
|
||||
|
||||
for client_node in expected_to_pass:
|
||||
assert query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts") == "5\n"
|
||||
assert (
|
||||
query_from_one_node_to_another(
|
||||
client_node, server, "SELECT * FROM test_allowed_client_hosts"
|
||||
)
|
||||
== "5\n"
|
||||
)
|
||||
|
||||
for client_node in expected_to_fail:
|
||||
with pytest.raises(Exception, match=r'default: Authentication failed'):
|
||||
query_from_one_node_to_another(client_node, server, "SELECT * FROM test_allowed_client_hosts")
|
||||
with pytest.raises(Exception, match=r"default: Authentication failed"):
|
||||
query_from_one_node_to_another(
|
||||
client_node, server, "SELECT * FROM test_allowed_client_hosts"
|
||||
)
|
||||
|
@ -2,13 +2,23 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml'])
|
||||
node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml'])
|
||||
node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml'])
|
||||
node4 = cluster.add_instance('node4', main_configs=[]) # No `remote_url_allow_hosts` at all.
|
||||
node5 = cluster.add_instance('node5', main_configs=['configs/config_without_allowed_hosts.xml'])
|
||||
node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml'])
|
||||
node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True)
|
||||
node1 = cluster.add_instance("node1", main_configs=["configs/config_with_hosts.xml"])
|
||||
node2 = cluster.add_instance(
|
||||
"node2", main_configs=["configs/config_with_only_primary_hosts.xml"]
|
||||
)
|
||||
node3 = cluster.add_instance(
|
||||
"node3", main_configs=["configs/config_with_only_regexp_hosts.xml"]
|
||||
)
|
||||
node4 = cluster.add_instance(
|
||||
"node4", main_configs=[]
|
||||
) # No `remote_url_allow_hosts` at all.
|
||||
node5 = cluster.add_instance(
|
||||
"node5", main_configs=["configs/config_without_allowed_hosts.xml"]
|
||||
)
|
||||
node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"])
|
||||
node7 = cluster.add_instance(
|
||||
"node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -21,97 +31,229 @@ def start_cluster():
|
||||
|
||||
|
||||
def test_config_with_hosts(start_cluster):
|
||||
assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == ""
|
||||
assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||
assert (
|
||||
node1.query(
|
||||
"CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node1.query(
|
||||
"CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert "not allowed" in node1.query_and_get_error(
|
||||
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)")
|
||||
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)"
|
||||
)
|
||||
assert "not allowed" in node1.query_and_get_error(
|
||||
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)")
|
||||
"CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)"
|
||||
)
|
||||
|
||||
|
||||
def test_config_with_only_primary_hosts(start_cluster):
|
||||
assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == ""
|
||||
assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == ""
|
||||
assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||
assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == ""
|
||||
assert (
|
||||
node2.query(
|
||||
"CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node2.query(
|
||||
"CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node2.query(
|
||||
"CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node2.query(
|
||||
"CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert "not allowed" in node2.query_and_get_error(
|
||||
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)")
|
||||
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)"
|
||||
)
|
||||
assert "not allowed" in node2.query_and_get_error(
|
||||
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)")
|
||||
"CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)"
|
||||
)
|
||||
assert "not allowed" in node2.query_and_get_error(
|
||||
"CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)")
|
||||
"CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)"
|
||||
)
|
||||
|
||||
|
||||
def test_config_with_only_regexp_hosts(start_cluster):
|
||||
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
|
||||
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||
assert (
|
||||
node3.query(
|
||||
"CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node3.query(
|
||||
"CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert "not allowed" in node3.query_and_get_error(
|
||||
"CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
|
||||
"CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)"
|
||||
)
|
||||
assert "not allowed" in node3.query_and_get_error(
|
||||
"CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
|
||||
"CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)"
|
||||
)
|
||||
|
||||
|
||||
def test_config_without_allowed_hosts_section(start_cluster):
|
||||
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
|
||||
assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)") == ""
|
||||
assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)") == ""
|
||||
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||
assert node4.query("CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)") == ""
|
||||
assert (
|
||||
node4.query(
|
||||
"CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node4.query(
|
||||
"CREATE TABLE table_test_4_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node4.query(
|
||||
"CREATE TABLE table_test_4_3 (word String) Engine=URL('https://host', HDFS)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node4.query(
|
||||
"CREATE TABLE table_test_4_4 (word String) Engine=URL('https://yandex.ru', CSV)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
assert (
|
||||
node4.query(
|
||||
"CREATE TABLE table_test_4_5 (word String) Engine=URL('ftp://something.com', S3)"
|
||||
)
|
||||
== ""
|
||||
)
|
||||
|
||||
|
||||
def test_config_without_allowed_hosts(start_cluster):
|
||||
assert "not allowed" in node5.query_and_get_error(
|
||||
"CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)")
|
||||
"CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)"
|
||||
)
|
||||
assert "not allowed" in node5.query_and_get_error(
|
||||
"CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)")
|
||||
"CREATE TABLE table_test_5_2 (word String) Engine=S3('https://host:80/bucket/key', CSV)"
|
||||
)
|
||||
assert "not allowed" in node5.query_and_get_error(
|
||||
"CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)")
|
||||
"CREATE TABLE table_test_5_3 (word String) Engine=URL('https://host', HDFS)"
|
||||
)
|
||||
assert "not allowed" in node5.query_and_get_error(
|
||||
"CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)")
|
||||
"CREATE TABLE table_test_5_4 (word String) Engine=URL('https://yandex.ru', CSV)"
|
||||
)
|
||||
assert "not allowed" in node5.query_and_get_error(
|
||||
"CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)")
|
||||
"CREATE TABLE table_test_5_5 (word String) Engine=URL('ftp://something.com', S3)"
|
||||
)
|
||||
|
||||
|
||||
def test_table_function_remote(start_cluster):
|
||||
assert "not allowed in configuration file" not in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed in configuration file" not in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed in configuration file" not in node6.query_and_get_error(
|
||||
"SELECT * FROM remote('example01-0{1,2}-1', system, events",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed in configuration file" not in node6.query_and_get_error(
|
||||
"SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed in configuration file" not in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed" in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)",
|
||||
settings={"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed" in node6.query_and_get_error(
|
||||
"SELECT * FROM remote('example01-01-{1|3}', system, events)",
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert "not allowed" in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)",
|
||||
settings={"connections_with_failover_max_tries": 1, "connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout": 1})
|
||||
settings={
|
||||
"connections_with_failover_max_tries": 1,
|
||||
"connect_timeout_with_failover_ms": 1000,
|
||||
"connect_timeout_with_failover_secure_ms": 1000,
|
||||
"connect_timeout": 1,
|
||||
"send_timeout": 1,
|
||||
},
|
||||
)
|
||||
assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
|
||||
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
|
||||
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('localhost:800', system, events)")
|
||||
assert "URL \"localhost:800\" is not allowed in configuration file" in node6.query_and_get_error(
|
||||
"SELECT * FROM remote('localhost:800', system, metrics)")
|
||||
assert (
|
||||
'URL "localhost:800" is not allowed in configuration file'
|
||||
in node6.query_and_get_error(
|
||||
"SELECT * FROM remoteSecure('localhost:800', system, events)"
|
||||
)
|
||||
)
|
||||
assert (
|
||||
'URL "localhost:800" is not allowed in configuration file'
|
||||
in node6.query_and_get_error(
|
||||
"SELECT * FROM remote('localhost:800', system, metrics)"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_redirect(start_cluster):
|
||||
@ -120,12 +262,17 @@ def test_redirect(start_cluster):
|
||||
hdfs_api.write_data("/simple_storage", "1\t\n")
|
||||
assert hdfs_api.read_data("/simple_storage") == "1\t\n"
|
||||
node7.query(
|
||||
"CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)")
|
||||
assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1")
|
||||
"CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)"
|
||||
)
|
||||
assert "not allowed" in node7.query_and_get_error(
|
||||
"SET max_http_get_redirects=1; SELECT * from table_test_7_1"
|
||||
)
|
||||
|
||||
|
||||
def test_HDFS(start_cluster):
|
||||
assert "not allowed" in node7.query_and_get_error(
|
||||
"CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')")
|
||||
"CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')"
|
||||
)
|
||||
assert "not allowed" in node7.query_and_get_error(
|
||||
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')")
|
||||
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')"
|
||||
)
|
||||
|
@ -4,8 +4,7 @@ from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1',
|
||||
main_configs=['configs/logs_config.xml'])
|
||||
node1 = cluster.add_instance("node1", main_configs=["configs/logs_config.xml"])
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -21,30 +20,60 @@ def started_cluster():
|
||||
def test_alter_codec_pk(started_cluster):
|
||||
try:
|
||||
name = "test_alter_codec_pk"
|
||||
node1.query("""
|
||||
node1.query(
|
||||
"""
|
||||
CREATE TABLE {name} (id UInt64, value UInt64) Engine=MergeTree() ORDER BY id
|
||||
""".format(name=name))
|
||||
""".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name))
|
||||
node1.query(
|
||||
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name))
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
|
||||
)
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4950\n"
|
||||
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
|
||||
|
||||
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
|
||||
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
|
||||
|
||||
@ -61,28 +90,58 @@ def test_alter_codec_pk(started_cluster):
|
||||
def test_alter_codec_index(started_cluster):
|
||||
try:
|
||||
name = "test_alter_codec_index"
|
||||
node1.query("""
|
||||
node1.query(
|
||||
"""
|
||||
CREATE TABLE {name} (`id` UInt64, value UInt64, INDEX id_index id TYPE minmax GRANULARITY 1) Engine=MergeTree() ORDER BY tuple()
|
||||
""".format(name=name))
|
||||
""".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} SELECT number, number * number from numbers(100)".format(name=name))
|
||||
node1.query(
|
||||
"INSERT INTO {name} SELECT number, number * number from numbers(100)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name))
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(NONE)".format(name=name)
|
||||
)
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt32 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 DEFAULT 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
|
||||
|
||||
assert node1.query("SELECT sum(id) FROM {name}".format(name=name)) == "4953\n"
|
||||
|
||||
with pytest.raises(QueryRuntimeException):
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 ALIAS 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(name=name))
|
||||
node1.query(
|
||||
"ALTER TABLE {name} MODIFY COLUMN id UInt64 MATERIALIZED 3 CODEC(Delta, LZ4)".format(
|
||||
name=name
|
||||
)
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO {name} (value) VALUES (1)".format(name=name))
|
||||
|
||||
|
@ -4,11 +4,18 @@ from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
|
||||
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
|
||||
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
|
||||
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml'], with_zookeeper=True)
|
||||
|
||||
node1 = cluster.add_instance(
|
||||
"node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
|
||||
)
|
||||
node2 = cluster.add_instance(
|
||||
"node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
|
||||
)
|
||||
node3 = cluster.add_instance(
|
||||
"node3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
|
||||
)
|
||||
node4 = cluster.add_instance(
|
||||
"node4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -17,19 +24,31 @@ def started_cluster():
|
||||
cluster.start()
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query_with_retry('''
|
||||
node.query_with_retry(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/sometable', '{replica}') ORDER BY id;
|
||||
'''.format(replica=node.name))
|
||||
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
|
||||
""".format(
|
||||
replica=node.name
|
||||
)
|
||||
)
|
||||
node.query_with_retry(
|
||||
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
|
||||
)
|
||||
|
||||
for node in [node3, node4]:
|
||||
node.query_with_retry('''
|
||||
node.query_with_retry(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table_replicated(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/1/someotable', '{replica}') ORDER BY id;
|
||||
'''.format(replica=node.name))
|
||||
""".format(
|
||||
replica=node.name
|
||||
)
|
||||
)
|
||||
|
||||
node.query_with_retry('''CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id''')
|
||||
node.query_with_retry(
|
||||
"""CREATE TABLE IF NOT EXISTS test_table(date Date, id UInt32, value Int32) ENGINE=MergeTree ORDER BY id"""
|
||||
)
|
||||
|
||||
yield cluster
|
||||
|
||||
@ -46,17 +65,23 @@ def test_alter_on_cluter_non_replicated(started_cluster):
|
||||
assert node3.query("SELECT COUNT() FROM test_table") == "1\n"
|
||||
assert node4.query("SELECT COUNT() FROM test_table") == "1\n"
|
||||
|
||||
node1.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime")
|
||||
node1.query(
|
||||
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime"
|
||||
)
|
||||
|
||||
assert node1.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
|
||||
assert node2.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
|
||||
assert node3.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
|
||||
assert node4.query("SELECT date FROM test_table") == '2019-10-01 00:00:00\n'
|
||||
assert node1.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
|
||||
assert node2.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
|
||||
assert node3.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
|
||||
assert node4.query("SELECT date FROM test_table") == "2019-10-01 00:00:00\n"
|
||||
|
||||
node3.query("ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String")
|
||||
node3.query(
|
||||
"ALTER TABLE test_table ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String"
|
||||
)
|
||||
|
||||
for node in [node1, node2, node3, node4]:
|
||||
node.query("INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
|
||||
node.query(
|
||||
"INSERT INTO test_table VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
|
||||
)
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM test_table") == "2\n"
|
||||
assert node2.query("SELECT COUNT() FROM test_table") == "2\n"
|
||||
@ -66,22 +91,40 @@ def test_alter_on_cluter_non_replicated(started_cluster):
|
||||
|
||||
def test_alter_replicated_on_cluster(started_cluster):
|
||||
for node in [node1, node3]:
|
||||
node.query("INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)")
|
||||
node.query(
|
||||
"INSERT INTO test_table_replicated VALUES(toDate('2019-10-01'), 1, 1)"
|
||||
)
|
||||
|
||||
for node in [node2, node4]:
|
||||
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
|
||||
|
||||
node1.query("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime", settings={"replication_alter_partitions_sync": "2"})
|
||||
node1.query(
|
||||
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN date DateTime",
|
||||
settings={"replication_alter_partitions_sync": "2"},
|
||||
)
|
||||
|
||||
assert node1.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
|
||||
assert node2.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
|
||||
assert node3.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
|
||||
assert node4.query("SELECT date FROM test_table_replicated") == '2019-10-01 00:00:00\n'
|
||||
assert (
|
||||
node1.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
|
||||
)
|
||||
assert (
|
||||
node2.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
|
||||
)
|
||||
assert (
|
||||
node3.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
|
||||
)
|
||||
assert (
|
||||
node4.query("SELECT date FROM test_table_replicated") == "2019-10-01 00:00:00\n"
|
||||
)
|
||||
|
||||
node3.query_with_retry("ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String", settings={"replication_alter_partitions_sync": "2"})
|
||||
node3.query_with_retry(
|
||||
"ALTER TABLE test_table_replicated ON CLUSTER 'test_cluster_mixed' MODIFY COLUMN value String",
|
||||
settings={"replication_alter_partitions_sync": "2"},
|
||||
)
|
||||
|
||||
for node in [node2, node4]:
|
||||
node.query("INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')")
|
||||
node.query(
|
||||
"INSERT INTO test_table_replicated VALUES(toDateTime('2019-10-02 00:00:00'), 2, 'Hello')"
|
||||
)
|
||||
|
||||
for node in [node1, node3]:
|
||||
node.query("SYSTEM SYNC REPLICA test_table_replicated", timeout=20)
|
||||
|
@ -3,7 +3,10 @@ from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1', user_configs=['configs/users.xml'], with_zookeeper=True)
|
||||
node1 = cluster.add_instance(
|
||||
"node1", user_configs=["configs/users.xml"], with_zookeeper=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
@ -13,24 +16,30 @@ def started_cluster():
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
def test_cast_keep_nullable(started_cluster):
|
||||
setting = node1.query("SELECT value FROM system.settings WHERE name='cast_keep_nullable'")
|
||||
assert(setting.strip() == "1")
|
||||
|
||||
result = node1.query("""
|
||||
def test_cast_keep_nullable(started_cluster):
|
||||
setting = node1.query(
|
||||
"SELECT value FROM system.settings WHERE name='cast_keep_nullable'"
|
||||
)
|
||||
assert setting.strip() == "1"
|
||||
|
||||
result = node1.query(
|
||||
"""
|
||||
DROP TABLE IF EXISTS t;
|
||||
CREATE TABLE t (x UInt64) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t SELECT number FROM numbers(10);
|
||||
SELECT * FROM t;
|
||||
""")
|
||||
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9")
|
||||
"""
|
||||
)
|
||||
assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"
|
||||
|
||||
error = node1.query_and_get_error("""
|
||||
error = node1.query_and_get_error(
|
||||
"""
|
||||
SET mutations_sync = 1;
|
||||
ALTER TABLE t UPDATE x = x % 3 = 0 ? NULL : x WHERE x % 2 = 1;
|
||||
""")
|
||||
assert("DB::Exception: Cannot convert NULL value to non-Nullable type" in error)
|
||||
"""
|
||||
)
|
||||
assert "DB::Exception: Cannot convert NULL value to non-Nullable type" in error
|
||||
|
||||
result = node1.query("SELECT * FROM t;")
|
||||
assert(result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9")
|
||||
|
||||
assert result.strip() == "0\n1\n2\n3\n4\n5\n6\n7\n8\n9"
|
||||
|
@ -6,8 +6,8 @@ from helpers.test_tools import assert_eq_with_retry
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1', with_zookeeper=True)
|
||||
node2 = cluster.add_instance('node2', with_zookeeper=True)
|
||||
node1 = cluster.add_instance("node1", with_zookeeper=True)
|
||||
node2 = cluster.add_instance("node2", with_zookeeper=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -22,21 +22,25 @@ def started_cluster():
|
||||
|
||||
|
||||
def test_replica_always_download(started_cluster):
|
||||
node1.query_with_retry("""
|
||||
node1.query_with_retry(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table(
|
||||
key UInt64,
|
||||
value String
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '1')
|
||||
ORDER BY tuple()
|
||||
""")
|
||||
node2.query_with_retry("""
|
||||
"""
|
||||
)
|
||||
node2.query_with_retry(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table(
|
||||
key UInt64,
|
||||
value String
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_table/replicated', '2')
|
||||
ORDER BY tuple()
|
||||
SETTINGS always_fetch_merged_part=1
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
# Stop merges on single node
|
||||
node1.query("SYSTEM STOP MERGES")
|
||||
@ -50,15 +54,29 @@ def test_replica_always_download(started_cluster):
|
||||
time.sleep(5)
|
||||
|
||||
# Nothing is merged
|
||||
assert node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n"
|
||||
assert node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1") == "10\n"
|
||||
assert (
|
||||
node1.query(
|
||||
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
|
||||
)
|
||||
== "10\n"
|
||||
)
|
||||
assert (
|
||||
node2.query(
|
||||
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
|
||||
)
|
||||
== "10\n"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM START MERGES")
|
||||
node1.query("OPTIMIZE TABLE test_table")
|
||||
node2.query("SYSTEM SYNC REPLICA test_table")
|
||||
|
||||
node1_parts = node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip()
|
||||
node2_parts = node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip()
|
||||
node1_parts = node1.query(
|
||||
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
|
||||
).strip()
|
||||
node2_parts = node2.query(
|
||||
"SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1"
|
||||
).strip()
|
||||
|
||||
assert int(node1_parts) < 10
|
||||
assert int(node2_parts) < 10
|
||||
|
@ -5,17 +5,19 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance('node', main_configs=['configs/config.xml'])
|
||||
node = cluster.add_instance("node", main_configs=["configs/config.xml"])
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
node.query("""
|
||||
node.query(
|
||||
"""
|
||||
create table t (number UInt64)
|
||||
engine = Distributed(test_cluster_two_shards, system, numbers)
|
||||
""")
|
||||
"""
|
||||
)
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
@ -25,12 +27,15 @@ def started_cluster():
|
||||
def test_filled_async_drain_connection_pool(started_cluster):
|
||||
def execute_queries(_):
|
||||
for _ in range(100):
|
||||
node.query('select * from t where number = 0 limit 2', settings={
|
||||
'sleep_in_receive_cancel_ms': int(10e6),
|
||||
'max_execution_time': 5,
|
||||
# decrease drain_timeout to make test more stable
|
||||
# (another way is to increase max_execution_time, but this will make test slower)
|
||||
'drain_timeout': 1,
|
||||
})
|
||||
node.query(
|
||||
"select * from t where number = 0 limit 2",
|
||||
settings={
|
||||
"sleep_in_receive_cancel_ms": int(10e6),
|
||||
"max_execution_time": 5,
|
||||
# decrease drain_timeout to make test more stable
|
||||
# (another way is to increase max_execution_time, but this will make test slower)
|
||||
"drain_timeout": 1,
|
||||
},
|
||||
)
|
||||
|
||||
any(map(execute_queries, range(10)))
|
||||
|
@ -4,8 +4,11 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1', with_zookeeper=True,
|
||||
main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
|
||||
node1 = cluster.add_instance(
|
||||
"node1",
|
||||
with_zookeeper=True,
|
||||
main_configs=["configs/asynchronous_metrics_update_period_s.xml"],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -27,20 +30,20 @@ def test_event_time_microseconds_field(started_cluster):
|
||||
cluster.start()
|
||||
node1.query("SET log_queries = 1;")
|
||||
node1.query("CREATE DATABASE replica;")
|
||||
query_create = '''CREATE TABLE replica.test
|
||||
query_create = """CREATE TABLE replica.test
|
||||
(
|
||||
id Int64,
|
||||
event_time DateTime
|
||||
)
|
||||
Engine=MergeTree()
|
||||
PARTITION BY toYYYYMMDD(event_time)
|
||||
ORDER BY id;'''
|
||||
ORDER BY id;"""
|
||||
time.sleep(2)
|
||||
node1.query(query_create)
|
||||
node1.query('''INSERT INTO replica.test VALUES (1, now())''')
|
||||
node1.query("""INSERT INTO replica.test VALUES (1, now())""")
|
||||
node1.query("SYSTEM FLUSH LOGS;")
|
||||
# query assumes that the event_time field is accurate
|
||||
equals_query = '''WITH (
|
||||
equals_query = """WITH (
|
||||
(
|
||||
SELECT event_time_microseconds
|
||||
FROM system.asynchronous_metric_log
|
||||
@ -53,7 +56,7 @@ def test_event_time_microseconds_field(started_cluster):
|
||||
ORDER BY event_time DESC
|
||||
LIMIT 1
|
||||
) AS time)
|
||||
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')'''
|
||||
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')"""
|
||||
assert "ok\n" in node1.query(equals_query)
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
@ -5,21 +5,29 @@ from helpers.cluster import ClickHouseCluster
|
||||
from helpers.network import PartitionManager
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1', main_configs=["configs/config.d/zookeeper_session_timeout.xml",
|
||||
"configs/remote_servers.xml"], with_zookeeper=True)
|
||||
node1 = cluster.add_instance(
|
||||
"node1",
|
||||
main_configs=[
|
||||
"configs/config.d/zookeeper_session_timeout.xml",
|
||||
"configs/remote_servers.xml",
|
||||
],
|
||||
with_zookeeper=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
node1.query("CREATE DATABASE zktest ENGINE=Ordinary;") # Different behaviour with Atomic
|
||||
node1.query(
|
||||
'''
|
||||
"CREATE DATABASE zktest ENGINE=Ordinary;"
|
||||
) # Different behaviour with Atomic
|
||||
node1.query(
|
||||
"""
|
||||
CREATE TABLE zktest.atomic_drop_table (n UInt32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/zktest/tables/atomic_drop_table', 'node1')
|
||||
PARTITION BY n ORDER BY n
|
||||
'''
|
||||
"""
|
||||
)
|
||||
yield cluster
|
||||
finally:
|
||||
@ -31,8 +39,10 @@ def test_atomic_delete_with_stopped_zookeeper(start_cluster):
|
||||
|
||||
with PartitionManager() as pm:
|
||||
pm.drop_instance_zk_connections(node1)
|
||||
error = node1.query_and_get_error("DROP TABLE zktest.atomic_drop_table") # Table won't drop
|
||||
error = node1.query_and_get_error(
|
||||
"DROP TABLE zktest.atomic_drop_table"
|
||||
) # Table won't drop
|
||||
assert error != ""
|
||||
|
||||
time.sleep(5)
|
||||
assert '8192' in node1.query("select * from zktest.atomic_drop_table")
|
||||
assert "8192" in node1.query("select * from zktest.atomic_drop_table")
|
||||
|
@ -3,7 +3,9 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True)
|
||||
node = cluster.add_instance(
|
||||
"node", main_configs=["configs/config.xml"], with_zookeeper=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -14,18 +16,35 @@ def started_cluster():
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def create_force_drop_flag(node):
|
||||
force_drop_flag_path = "/var/lib/clickhouse/flags/force_drop_table"
|
||||
node.exec_in_container(["bash", "-c", "touch {} && chmod a=rw {}".format(force_drop_flag_path, force_drop_flag_path)], user="root")
|
||||
node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"touch {} && chmod a=rw {}".format(
|
||||
force_drop_flag_path, force_drop_flag_path
|
||||
),
|
||||
],
|
||||
user="root",
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic'])
|
||||
|
||||
@pytest.mark.parametrize("engine", ["Ordinary", "Atomic"])
|
||||
def test_attach_partition_with_large_destination(started_cluster, engine):
|
||||
# Initialize
|
||||
node.query("CREATE DATABASE db ENGINE={}".format(engine))
|
||||
node.query("CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2")
|
||||
node.query("CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2")
|
||||
node.query(
|
||||
"CREATE TABLE db.destination (n UInt64) ENGINE=ReplicatedMergeTree('/test/destination', 'r1') ORDER BY n PARTITION BY n % 2"
|
||||
)
|
||||
node.query(
|
||||
"CREATE TABLE db.source_1 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_1', 'r1') ORDER BY n PARTITION BY n % 2"
|
||||
)
|
||||
node.query("INSERT INTO db.source_1 VALUES (1), (2), (3), (4)")
|
||||
node.query("CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2")
|
||||
node.query(
|
||||
"CREATE TABLE db.source_2 (n UInt64) ENGINE=ReplicatedMergeTree('/test/source_2', 'r1') ORDER BY n PARTITION BY n % 2"
|
||||
)
|
||||
node.query("INSERT INTO db.source_2 VALUES (5), (6), (7), (8)")
|
||||
|
||||
# Attach partition when destination partition is empty
|
||||
@ -33,7 +52,9 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
|
||||
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
|
||||
|
||||
# REPLACE PARTITION should still respect max_partition_size_to_drop
|
||||
assert node.query_and_get_error("ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2")
|
||||
assert node.query_and_get_error(
|
||||
"ALTER TABLE db.destination REPLACE PARTITION 0 FROM db.source_2"
|
||||
)
|
||||
assert node.query("SELECT n FROM db.destination ORDER BY n") == "2\n4\n"
|
||||
|
||||
# Attach partition when destination partition is larger than max_partition_size_to_drop
|
||||
@ -47,4 +68,4 @@ def test_attach_partition_with_large_destination(started_cluster, engine):
|
||||
node.query("DROP TABLE db.source_2 SYNC")
|
||||
create_force_drop_flag(node)
|
||||
node.query("DROP TABLE db.destination SYNC")
|
||||
node.query("DROP DATABASE db")
|
||||
node.query("DROP DATABASE db")
|
||||
|
@ -3,7 +3,8 @@ from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance('node1')
|
||||
node1 = cluster.add_instance("node1")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
@ -17,9 +18,12 @@ def start_cluster():
|
||||
|
||||
def test_attach_without_checksums(start_cluster):
|
||||
node1.query(
|
||||
"CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date")
|
||||
"CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date"
|
||||
)
|
||||
|
||||
node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)")
|
||||
node1.query(
|
||||
"INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)"
|
||||
)
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
|
||||
|
||||
@ -30,15 +34,27 @@ def test_attach_without_checksums(start_cluster):
|
||||
|
||||
# to be sure output not empty
|
||||
node1.exec_in_container(
|
||||
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '],
|
||||
privileged=True, user='root')
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" ',
|
||||
],
|
||||
privileged=True,
|
||||
user="root",
|
||||
)
|
||||
|
||||
node1.exec_in_container(
|
||||
['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'],
|
||||
privileged=True, user='root')
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete',
|
||||
],
|
||||
privileged=True,
|
||||
user="root",
|
||||
)
|
||||
|
||||
node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
|
||||
assert node1.query("SELECT COUNT() FROM test") == "100\n"
|
||||
node1.query("DROP TABLE test")
|
||||
node1.query("DROP TABLE test")
|
||||
|
@ -7,19 +7,25 @@ from helpers.test_tools import assert_eq_with_retry
|
||||
from helpers.network import PartitionManager
|
||||
from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path
|
||||
|
||||
|
||||
def fill_node(node):
|
||||
node.query_with_retry(
|
||||
'''
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test(n UInt32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
|
||||
ORDER BY n PARTITION BY n % 10;
|
||||
'''.format(replica=node.name))
|
||||
""".format(
|
||||
replica=node.name
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node_1 = cluster.add_instance('replica1', with_zookeeper=True)
|
||||
node_2 = cluster.add_instance('replica2', with_zookeeper=True)
|
||||
node_3 = cluster.add_instance('replica3', with_zookeeper=True)
|
||||
node_1 = cluster.add_instance("replica1", with_zookeeper=True)
|
||||
node_2 = cluster.add_instance("replica2", with_zookeeper=True)
|
||||
node_3 = cluster.add_instance("replica3", with_zookeeper=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
@ -36,27 +42,42 @@ def start_cluster():
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def check_data(nodes, detached_parts):
|
||||
for node in nodes:
|
||||
print("> Replication queue for", node.name, "\n> table\treplica_name\tsource_replica\ttype\tposition\n",
|
||||
node.query_with_retry("SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"))
|
||||
print(
|
||||
"> Replication queue for",
|
||||
node.name,
|
||||
"\n> table\treplica_name\tsource_replica\ttype\tposition\n",
|
||||
node.query_with_retry(
|
||||
"SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"
|
||||
),
|
||||
)
|
||||
|
||||
node.query_with_retry("SYSTEM SYNC REPLICA test")
|
||||
|
||||
print("> Checking data integrity for", node.name)
|
||||
|
||||
for i in range(10):
|
||||
assert_eq_with_retry(node, "SELECT count() FROM test WHERE n % 10 == " + str(i),
|
||||
"0\n" if i in detached_parts else "10\n")
|
||||
assert_eq_with_retry(
|
||||
node,
|
||||
"SELECT count() FROM test WHERE n % 10 == " + str(i),
|
||||
"0\n" if i in detached_parts else "10\n",
|
||||
)
|
||||
|
||||
assert_eq_with_retry(node, "SELECT count() FROM system.parts WHERE table='test'",
|
||||
str(10 - len(detached_parts)) + "\n")
|
||||
assert_eq_with_retry(
|
||||
node,
|
||||
"SELECT count() FROM system.parts WHERE table='test'",
|
||||
str(10 - len(detached_parts)) + "\n",
|
||||
)
|
||||
|
||||
res: str = node.query("SELECT * FROM test ORDER BY n")
|
||||
|
||||
for other in nodes:
|
||||
if other != node:
|
||||
logging.debug(f"> Checking data consistency, {other.name} vs {node.name}")
|
||||
logging.debug(
|
||||
f"> Checking data consistency, {other.name} vs {node.name}"
|
||||
)
|
||||
assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res)
|
||||
|
||||
|
||||
@ -83,7 +104,6 @@ def test_attach_without_fetching(start_cluster):
|
||||
# files missing.
|
||||
node_1.query("ALTER TABLE test DETACH PARTITION 2")
|
||||
|
||||
|
||||
check_data([node_1, node_2], detached_parts=[0, 1, 2])
|
||||
|
||||
# 2. Create the third replica
|
||||
@ -94,14 +114,28 @@ def test_attach_without_fetching(start_cluster):
|
||||
# Replica 2 should also download the data from 1 as the checksums won't match.
|
||||
logging.debug("Checking attach with corrupted part data with files missing")
|
||||
|
||||
to_delete = node_2.exec_in_container(['bash', '-c',
|
||||
'cd {p} && ls *.bin'.format(
|
||||
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
|
||||
to_delete = node_2.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cd {p} && ls *.bin".format(
|
||||
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
|
||||
),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
logging.debug(f"Before deleting: {to_delete}")
|
||||
|
||||
node_2.exec_in_container(['bash', '-c',
|
||||
'cd {p} && rm -fr *.bin'.format(
|
||||
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
|
||||
node_2.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cd {p} && rm -fr *.bin".format(
|
||||
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
|
||||
),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
||||
node_1.query("ALTER TABLE test ATTACH PARTITION 2")
|
||||
check_data([node_1, node_2, node_3], detached_parts=[0, 1])
|
||||
@ -111,7 +145,9 @@ def test_attach_without_fetching(start_cluster):
|
||||
# Replica 2 should also download the data from 1 as the checksums won't match.
|
||||
print("Checking attach with corrupted part data with all of the files present")
|
||||
|
||||
corrupt_part_data_by_path(node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0")
|
||||
corrupt_part_data_by_path(
|
||||
node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0"
|
||||
)
|
||||
|
||||
node_1.query("ALTER TABLE test ATTACH PARTITION 1")
|
||||
check_data([node_1, node_2, node_3], detached_parts=[0])
|
||||
@ -123,8 +159,8 @@ def test_attach_without_fetching(start_cluster):
|
||||
|
||||
with PartitionManager() as pm:
|
||||
# If something goes wrong and replica 2 wants to fetch data, the test will fail.
|
||||
pm.partition_instances(node_2, node_1, action='REJECT --reject-with tcp-reset')
|
||||
pm.partition_instances(node_1, node_3, action='REJECT --reject-with tcp-reset')
|
||||
pm.partition_instances(node_2, node_1, action="REJECT --reject-with tcp-reset")
|
||||
pm.partition_instances(node_1, node_3, action="REJECT --reject-with tcp-reset")
|
||||
|
||||
node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'")
|
||||
|
||||
|
@ -2,7 +2,7 @@ import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
instance = cluster.add_instance('instance')
|
||||
instance = cluster.add_instance("instance")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
@ -20,18 +20,30 @@ def setup_nodes():
|
||||
|
||||
|
||||
def test_authentication_pass():
|
||||
assert instance.query("SELECT currentUser()", user='sasha') == 'sasha\n'
|
||||
assert instance.query("SELECT currentUser()", user='masha', password='qwerty') == 'masha\n'
|
||||
assert instance.query("SELECT currentUser()", user="sasha") == "sasha\n"
|
||||
assert (
|
||||
instance.query("SELECT currentUser()", user="masha", password="qwerty")
|
||||
== "masha\n"
|
||||
)
|
||||
|
||||
# 'no_password' authentication type allows to login with any password.
|
||||
assert instance.query("SELECT currentUser()", user='sasha', password='something') == 'sasha\n'
|
||||
assert instance.query("SELECT currentUser()", user='sasha', password='something2') == 'sasha\n'
|
||||
assert (
|
||||
instance.query("SELECT currentUser()", user="sasha", password="something")
|
||||
== "sasha\n"
|
||||
)
|
||||
assert (
|
||||
instance.query("SELECT currentUser()", user="sasha", password="something2")
|
||||
== "sasha\n"
|
||||
)
|
||||
|
||||
|
||||
def test_authentication_fail():
|
||||
# User doesn't exist.
|
||||
assert "vasya: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='vasya')
|
||||
assert "vasya: Authentication failed" in instance.query_and_get_error(
|
||||
"SELECT currentUser()", user="vasya"
|
||||
)
|
||||
|
||||
# Wrong password.
|
||||
assert "masha: Authentication failed" in instance.query_and_get_error("SELECT currentUser()", user='masha',
|
||||
password='123')
|
||||
assert "masha: Authentication failed" in instance.query_and_get_error(
|
||||
"SELECT currentUser()", user="masha", password="123"
|
||||
)
|
||||
|
@ -17,12 +17,20 @@ CLUSTER_NAME = "test_cluster"
|
||||
def cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(NODE1, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '1'},
|
||||
with_azurite=True,
|
||||
with_zookeeper=True)
|
||||
cluster.add_instance(NODE2, main_configs=["configs/config.d/storage_conf.xml"], macros={'replica': '2'},
|
||||
with_azurite=True,
|
||||
with_zookeeper=True)
|
||||
cluster.add_instance(
|
||||
NODE1,
|
||||
main_configs=["configs/config.d/storage_conf.xml"],
|
||||
macros={"replica": "1"},
|
||||
with_azurite=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
NODE2,
|
||||
main_configs=["configs/config.d/storage_conf.xml"],
|
||||
macros={"replica": "2"},
|
||||
with_azurite=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
logging.info("Starting cluster...")
|
||||
cluster.start()
|
||||
logging.info("Cluster started")
|
||||
@ -53,7 +61,10 @@ def create_table(node, table_name, replica, **additional_settings):
|
||||
|
||||
|
||||
def get_large_objects_count(blob_container_client, large_size_threshold=100):
|
||||
return sum(blob['size'] > large_size_threshold for blob in blob_container_client.list_blobs())
|
||||
return sum(
|
||||
blob["size"] > large_size_threshold
|
||||
for blob in blob_container_client.list_blobs()
|
||||
)
|
||||
|
||||
|
||||
def test_zero_copy_replication(cluster):
|
||||
@ -61,15 +72,21 @@ def test_zero_copy_replication(cluster):
|
||||
node2 = cluster.instances[NODE2]
|
||||
create_table(node1, TABLE_NAME, 1)
|
||||
|
||||
blob_container_client = cluster.blob_service_client.get_container_client(CONTAINER_NAME)
|
||||
blob_container_client = cluster.blob_service_client.get_container_client(
|
||||
CONTAINER_NAME
|
||||
)
|
||||
|
||||
values1 = "(0,'data'),(1,'data')"
|
||||
values2 = "(2,'data'),(3,'data')"
|
||||
|
||||
node1.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}")
|
||||
node2.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
|
||||
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
|
||||
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
|
||||
assert (
|
||||
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
|
||||
)
|
||||
assert (
|
||||
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1
|
||||
)
|
||||
|
||||
# Based on version 21.x - should be only one file with size 100+ (checksums.txt), used by both nodes
|
||||
assert get_large_objects_count(blob_container_client) == 1
|
||||
@ -77,7 +94,13 @@ def test_zero_copy_replication(cluster):
|
||||
node2.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}")
|
||||
node1.query(f"SYSTEM SYNC REPLICA {TABLE_NAME}")
|
||||
|
||||
assert node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2
|
||||
assert node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values") == values1 + "," + values2
|
||||
assert (
|
||||
node2.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
|
||||
== values1 + "," + values2
|
||||
)
|
||||
assert (
|
||||
node1.query(f"SELECT * FROM {TABLE_NAME} order by id FORMAT Values")
|
||||
== values1 + "," + values2
|
||||
)
|
||||
|
||||
assert get_large_objects_count(blob_container_client) == 2
|
||||
|
@ -6,25 +6,35 @@ from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
instance = cluster.add_instance('node')
|
||||
path_to_data = '/var/lib/clickhouse/'
|
||||
instance = cluster.add_instance("node")
|
||||
path_to_data = "/var/lib/clickhouse/"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
instance.query('CREATE DATABASE test ENGINE = Ordinary') # Different path in shadow/ with Atomic
|
||||
instance.query(
|
||||
"CREATE DATABASE test ENGINE = Ordinary"
|
||||
) # Different path in shadow/ with Atomic
|
||||
instance.query("DROP TABLE IF EXISTS test.tbl")
|
||||
instance.query("CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p")
|
||||
instance.query(
|
||||
"CREATE TABLE test.tbl (p Date, k Int8) ENGINE = MergeTree PARTITION BY toYYYYMM(p) ORDER BY p"
|
||||
)
|
||||
for i in range(1, 4):
|
||||
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
for i in range(31, 34):
|
||||
instance.query('INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl (p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
|
||||
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
|
||||
expected = TSV(
|
||||
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("ALTER TABLE test.tbl FREEZE")
|
||||
|
||||
@ -33,21 +43,24 @@ def started_cluster():
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def get_last_backup_path(instance, database, table):
|
||||
fp_increment = os.path.join(path_to_data, 'shadow/increment.txt')
|
||||
increment = instance.exec_in_container(['cat', fp_increment]).strip()
|
||||
return os.path.join(path_to_data, 'shadow', increment, 'data', database, table)
|
||||
fp_increment = os.path.join(path_to_data, "shadow/increment.txt")
|
||||
increment = instance.exec_in_container(["cat", fp_increment]).strip()
|
||||
return os.path.join(path_to_data, "shadow", increment, "data", database, table)
|
||||
|
||||
|
||||
def copy_backup_to_detached(instance, database, src_table, dst_table):
|
||||
fp_backup = os.path.join(path_to_data, 'shadow', '*', 'data', database, src_table)
|
||||
fp_detached = os.path.join(path_to_data, 'data', database, dst_table, 'detached')
|
||||
logging.debug(f'copy from {fp_backup} to {fp_detached}')
|
||||
instance.exec_in_container(['bash', '-c', f'cp -r {fp_backup} -T {fp_detached}'])
|
||||
fp_backup = os.path.join(path_to_data, "shadow", "*", "data", database, src_table)
|
||||
fp_detached = os.path.join(path_to_data, "data", database, dst_table, "detached")
|
||||
logging.debug(f"copy from {fp_backup} to {fp_detached}")
|
||||
instance.exec_in_container(["bash", "-c", f"cp -r {fp_backup} -T {fp_detached}"])
|
||||
|
||||
|
||||
def test_restore(started_cluster):
|
||||
instance.query("CREATE TABLE test.tbl1 AS test.tbl")
|
||||
|
||||
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl1')
|
||||
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl1")
|
||||
|
||||
# The data_version of parts to be attached are larger than the newly created table's data_version.
|
||||
instance.query("ALTER TABLE test.tbl1 ATTACH PARTITION 197001")
|
||||
@ -55,17 +68,21 @@ def test_restore(started_cluster):
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
# Validate the attached parts are identical to the backup.
|
||||
expected = TSV('1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
|
||||
expected = TSV(
|
||||
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("ALTER TABLE test.tbl1 UPDATE k=10 WHERE 1")
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
# Validate mutation has been applied to all attached parts.
|
||||
expected = TSV('1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10')
|
||||
expected = TSV(
|
||||
"1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl1 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("DROP TABLE IF EXISTS test.tbl1")
|
||||
|
||||
@ -73,15 +90,19 @@ def test_restore(started_cluster):
|
||||
def test_attach_partition(started_cluster):
|
||||
instance.query("CREATE TABLE test.tbl2 AS test.tbl")
|
||||
for i in range(3, 5):
|
||||
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
for i in range(33, 35):
|
||||
instance.query('INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl2(p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
|
||||
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34')
|
||||
expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
|
||||
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl2')
|
||||
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl2")
|
||||
|
||||
# The data_version of parts to be attached
|
||||
# - may be less than, equal to or larger than the current table's data_version.
|
||||
@ -91,18 +112,20 @@ def test_attach_partition(started_cluster):
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
expected = TSV(
|
||||
'1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34')
|
||||
"1970-01-02\t1\n1970-01-03\t2\n1970-01-04\t3\n1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33\n1970-02-03\t33\n1970-02-04\t34"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("ALTER TABLE test.tbl2 UPDATE k=10 WHERE 1")
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
# Validate mutation has been applied to all attached parts.
|
||||
expected = TSV(
|
||||
'1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10')
|
||||
"1970-01-02\t10\n1970-01-03\t10\n1970-01-04\t10\n1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10\n1970-02-03\t10\n1970-02-04\t10"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl2 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("DROP TABLE IF EXISTS test.tbl2")
|
||||
|
||||
@ -110,15 +133,19 @@ def test_attach_partition(started_cluster):
|
||||
def test_replace_partition(started_cluster):
|
||||
instance.query("CREATE TABLE test.tbl3 AS test.tbl")
|
||||
for i in range(3, 5):
|
||||
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
for i in range(33, 35):
|
||||
instance.query('INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})'.format(i, i))
|
||||
instance.query(
|
||||
"INSERT INTO test.tbl3(p, k) VALUES(toDate({}), {})".format(i, i)
|
||||
)
|
||||
|
||||
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34')
|
||||
expected = TSV("1970-01-04\t3\n1970-01-05\t4\n1970-02-03\t33\n1970-02-04\t34")
|
||||
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
copy_backup_to_detached(started_cluster.instances['node'], 'test', 'tbl', 'tbl3')
|
||||
copy_backup_to_detached(started_cluster.instances["node"], "test", "tbl", "tbl3")
|
||||
|
||||
# The data_version of parts to be copied
|
||||
# - may be less than, equal to or larger than the current table data_version.
|
||||
@ -126,35 +153,56 @@ def test_replace_partition(started_cluster):
|
||||
instance.query("ALTER TABLE test.tbl3 REPLACE PARTITION 197002 FROM test.tbl")
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
expected = TSV('1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33')
|
||||
expected = TSV(
|
||||
"1970-01-04\t3\n1970-01-05\t4\n1970-02-01\t31\n1970-02-02\t32\n1970-02-03\t33"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("ALTER TABLE test.tbl3 UPDATE k=10 WHERE 1")
|
||||
instance.query("SELECT sleep(2)")
|
||||
|
||||
# Validate mutation has been applied to all copied parts.
|
||||
expected = TSV('1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10')
|
||||
expected = TSV(
|
||||
"1970-01-04\t10\n1970-01-05\t10\n1970-02-01\t10\n1970-02-02\t10\n1970-02-03\t10"
|
||||
)
|
||||
res = instance.query("SELECT * FROM test.tbl3 ORDER BY p")
|
||||
assert (TSV(res) == expected)
|
||||
assert TSV(res) == expected
|
||||
|
||||
instance.query("DROP TABLE IF EXISTS test.tbl3")
|
||||
|
||||
|
||||
def test_freeze_in_memory(started_cluster):
|
||||
instance.query("CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000")
|
||||
instance.query(
|
||||
"CREATE TABLE test.t_in_memory(a UInt32, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_compact_part = 1000"
|
||||
)
|
||||
instance.query("INSERT INTO test.t_in_memory VALUES (1, 'a')")
|
||||
instance.query("ALTER TABLE test.t_in_memory FREEZE")
|
||||
|
||||
fp_backup = get_last_backup_path(started_cluster.instances['node'], 'test', 't_in_memory')
|
||||
part_path = fp_backup + '/all_1_1_0/'
|
||||
fp_backup = get_last_backup_path(
|
||||
started_cluster.instances["node"], "test", "t_in_memory"
|
||||
)
|
||||
part_path = fp_backup + "/all_1_1_0/"
|
||||
|
||||
assert TSV(instance.query("SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'")) == TSV("InMemory\t1\n")
|
||||
instance.exec_in_container(['test', '-f', part_path + '/data.bin'])
|
||||
assert instance.exec_in_container(['cat', part_path + '/count.txt']).strip() == '1'
|
||||
assert TSV(
|
||||
instance.query(
|
||||
"SELECT part_type, is_frozen FROM system.parts WHERE database = 'test' AND table = 't_in_memory'"
|
||||
)
|
||||
) == TSV("InMemory\t1\n")
|
||||
instance.exec_in_container(["test", "-f", part_path + "/data.bin"])
|
||||
assert instance.exec_in_container(["cat", part_path + "/count.txt"]).strip() == "1"
|
||||
|
||||
instance.query("CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a")
|
||||
copy_backup_to_detached(started_cluster.instances['node'], 'test', 't_in_memory', 't_in_memory_2')
|
||||
instance.query(
|
||||
"CREATE TABLE test.t_in_memory_2(a UInt32, s String) ENGINE = MergeTree ORDER BY a"
|
||||
)
|
||||
copy_backup_to_detached(
|
||||
started_cluster.instances["node"], "test", "t_in_memory", "t_in_memory_2"
|
||||
)
|
||||
|
||||
instance.query("ALTER TABLE test.t_in_memory_2 ATTACH PARTITION ID 'all'")
|
||||
assert TSV(instance.query("SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'")) == TSV("Compact\n")
|
||||
assert TSV(
|
||||
instance.query(
|
||||
"SELECT part_type FROM system.parts WHERE database = 'test' AND table = 't_in_memory_2'"
|
||||
)
|
||||
) == TSV("Compact\n")
|
||||
assert TSV(instance.query("SELECT a, s FROM test.t_in_memory_2")) == TSV("1\ta\n")
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user