diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index b1302877d6a..96f2aa96dd5 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -10,31 +10,38 @@ import requests import tempfile -DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com' +DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com" AVAILABLE_DATASETS = { - 'hits': 'hits_v1.tar', - 'visits': 'visits_v1.tar', + "hits": "hits_v1.tar", + "visits": "visits_v1.tar", } RETRIES_COUNT = 5 + def _get_temp_file_name(): - return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + return os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ) + def build_url(base_url, dataset): - return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset]) + return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset]) + def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'wb') as f: + with open(path, "wb") as f: response = requests.get(url, stream=True) response.raise_for_status() - total_length = response.headers.get('content-length') + total_length = response.headers.get("content-length") if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") + logging.info( + "No content-length, will download file without progress" + ) f.write(response.content) else: dl = 0 @@ -46,7 +53,11 @@ def dowload_with_progress(url, path): if sys.stdout.isatty(): done = int(50 * dl / total_length) percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.write( + "\r[{}{}] {}%".format( + "=" * done, " " * (50 - done), percent + ) + ) sys.stdout.flush() break except Exception as ex: @@ -56,14 +67,21 @@ def dowload_with_progress(url, path): if os.path.exists(path): os.remove(path) else: - raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) + raise Exception( + "Cannot download dataset from {}, all retries exceeded".format(url) + ) sys.stdout.write("\n") logging.info("Downloading finished") + def unpack_to_clickhouse_directory(tar_path, clickhouse_path): - logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path) - with tarfile.open(tar_path, 'r') as comp_file: + logging.info( + "Will unpack data from temp path %s to clickhouse db %s", + tar_path, + clickhouse_path, + ) + with tarfile.open(tar_path, "r") as comp_file: comp_file.extractall(path=clickhouse_path) logging.info("Unpack finished") @@ -72,15 +90,21 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( - description="Simple tool for dowloading datasets for clickhouse from S3") + description="Simple tool for dowloading datasets for clickhouse from S3" + ) - parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys())) - parser.add_argument('--url-prefix', default=DEFAULT_URL) - parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/') + parser.add_argument( + "--dataset-names", + required=True, + nargs="+", + choices=list(AVAILABLE_DATASETS.keys()), + ) + parser.add_argument("--url-prefix", default=DEFAULT_URL) + parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/") args = parser.parse_args() datasets = args.dataset_names - logging.info("Will fetch following datasets: %s", ', '.join(datasets)) + logging.info("Will fetch following datasets: %s", ", ".join(datasets)) for dataset in datasets: logging.info("Processing %s", dataset) temp_archive_path = _get_temp_file_name() @@ -92,10 +116,11 @@ if __name__ == "__main__": logging.info("Some exception occured %s", str(ex)) raise finally: - logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path) + logging.info( + "Will remove downloaded file %s from filesystem if it exists", + temp_archive_path, + ) if os.path.exists(temp_archive_path): os.remove(temp_archive_path) logging.info("Processing of %s finished", dataset) logging.info("Fetch finished, enjoy your tables!") - - diff --git a/tests/clickhouse-test b/tests/clickhouse-test index aec52981724..2de5b4e1333 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -77,7 +77,7 @@ def trim_for_log(s): return s lines = s.splitlines() if len(lines) > 10000: - separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40 + separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40 return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:]) else: return "\n".join(lines) @@ -95,7 +95,13 @@ class HTTPError(Exception): # Helpers to execute queries via HTTP interface. def clickhouse_execute_http( - base_args, query, timeout=30, settings=None, default_format=None, max_http_retries=5, retry_error_codes=False + base_args, + query, + timeout=30, + settings=None, + default_format=None, + max_http_retries=5, + retry_error_codes=False, ): if args.secure: client = http.client.HTTPSConnection( @@ -146,12 +152,36 @@ def clickhouse_execute_http( return data -def clickhouse_execute(base_args, query, timeout=30, settings=None, max_http_retries=5, retry_error_codes=False): - return clickhouse_execute_http(base_args, query, timeout, settings, max_http_retries=max_http_retries, retry_error_codes=retry_error_codes).strip() + +def clickhouse_execute( + base_args, + query, + timeout=30, + settings=None, + max_http_retries=5, + retry_error_codes=False, +): + return clickhouse_execute_http( + base_args, + query, + timeout, + settings, + max_http_retries=max_http_retries, + retry_error_codes=retry_error_codes, + ).strip() -def clickhouse_execute_json(base_args, query, timeout=60, settings=None, max_http_retries=5): - data = clickhouse_execute_http(base_args, query, timeout, settings, "JSONEachRow", max_http_retries=max_http_retries) +def clickhouse_execute_json( + base_args, query, timeout=60, settings=None, max_http_retries=5 +): + data = clickhouse_execute_http( + base_args, + query, + timeout, + settings, + "JSONEachRow", + max_http_retries=max_http_retries, + ) if not data: return None rows = [] @@ -648,7 +678,9 @@ class TestCase: clickhouse_execute( args, - "CREATE DATABASE IF NOT EXISTS " + database + get_db_engine(testcase_args, database), + "CREATE DATABASE IF NOT EXISTS " + + database + + get_db_engine(testcase_args, database), settings=get_create_database_settings(args, testcase_args), ) @@ -831,7 +863,8 @@ class TestCase: # TODO: remove checking "no-upgrade-check" after 23.1 elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags): + "no-upgrade-check" in tags or "no-upgrade-check" in tags + ): return FailureReason.NO_UPGRADE_CHECK elif tags and ("no-s3-storage" in tags) and args.s3_storage: @@ -1051,7 +1084,11 @@ class TestCase: @staticmethod def send_test_name_failed(suite: str, case: str): pid = os.getpid() - clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'", retry_error_codes=True) + clickhouse_execute( + args, + f"SELECT 'Running test {suite}/{case} from pid={pid}'", + retry_error_codes=True, + ) def run_single_test( self, server_logs_level, client_options @@ -2220,6 +2257,7 @@ def find_binary(name): raise Exception(f"{name} was not found in PATH") + def find_clickhouse_command(binary, command): symlink = binary + "-" + command if os.access(symlink, os.X_OK): @@ -2228,6 +2266,7 @@ def find_clickhouse_command(binary, command): # To avoid requiring symlinks (in case you download binary from CI) return binary + " " + command + def get_additional_client_options(args): if args.client_option: return " ".join("--" + option for option in args.client_option) @@ -2569,7 +2608,9 @@ if __name__ == "__main__": "WARNING: --extract_from_config option is deprecated and will be removed the the future", file=sys.stderr, ) - args.extract_from_config = find_clickhouse_command(args.binary, "extract-from-config") + args.extract_from_config = find_clickhouse_command( + args.binary, "extract-from-config" + ) if args.configclient: args.client += " --config-file=" + args.configclient diff --git a/tests/integration/runner b/tests/integration/runner index c1b3178faa4..f658bac412b 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -243,11 +243,18 @@ if __name__ == "__main__": ) parser.add_argument( - "--no-random", action="store", dest="no_random", help="Disable tests order randomization" + "--no-random", + action="store", + dest="no_random", + help="Disable tests order randomization", ) parser.add_argument( - "--pre-pull", action="store_true", default=False, dest="pre_pull", help="Pull images for docker_compose before all other actions" + "--pre-pull", + action="store_true", + default=False, + dest="pre_pull", + help="Pull images for docker_compose before all other actions", ) parser.add_argument( @@ -306,7 +313,6 @@ if __name__ == "__main__": # if not args.no_random: # rand_args += f"--random-seed={os.getpid()}" - net = "" if args.network: net = "--net={}".format(args.network) @@ -416,8 +422,11 @@ if __name__ == "__main__": name=CONTAINER_NAME, ) - cmd = cmd_base + " " + args.command - cmd_pre_pull = cmd_base + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;" + cmd = cmd_base + " " + args.command + cmd_pre_pull = ( + cmd_base + + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;" + ) containers = subprocess.check_output( f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", diff --git a/tests/queries/0_stateless/00386_long_in_pk.python b/tests/queries/0_stateless/00386_long_in_pk.python index e33bb254c60..c7b04102dc5 100644 --- a/tests/queries/0_stateless/00386_long_in_pk.python +++ b/tests/queries/0_stateless/00386_long_in_pk.python @@ -1,57 +1,72 @@ #!/usr/bin/env python3 + def gen_queries(): - create_template = 'create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}' - drop_query = 'drop table if exists tab_00386' - values = ('1', "'a'", 'tuple(1)', 'tuple(tuple(1))', "(1, 'a')", "tuple((1, 'a'))") + create_template = "create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}" + drop_query = "drop table if exists tab_00386" + values = ("1", "'a'", "tuple(1)", "tuple(tuple(1))", "(1, 'a')", "tuple((1, 'a'))") insert_query = "insert into tab_00386 values (1, 'a', tuple(1), tuple(tuple(1)), (1, 'a'), tuple((1, 'a')))" - columns = tuple('a b c d'.split()) - order_by_columns = tuple('a b c'.split()) - partition_by_columns = tuple(' tuple() a'.split()) + columns = tuple("a b c d".split()) + order_by_columns = tuple("a b c".split()) + partition_by_columns = tuple(" tuple() a".split()) for partition in partition_by_columns: for key_mask in range(1, 1 << len(order_by_columns)): - key = ','.join(order_by_columns[i] for i in range(len(order_by_columns)) if (1 << i) & key_mask != 0) + key = ",".join( + order_by_columns[i] + for i in range(len(order_by_columns)) + if (1 << i) & key_mask != 0 + ) create_query = create_template.format(key, partition) for q in (drop_query, create_query, insert_query): yield q for column, value in zip(columns, values): - yield 'select {} in {} from tab_00386'.format(column, value) - yield 'select {} in tuple({}) from tab_00386'.format(column, value) - yield 'select {} in (select {} from tab_00386) from tab_00386'.format(column, column) + yield "select {} in {} from tab_00386".format(column, value) + yield "select {} in tuple({}) from tab_00386".format(column, value) + yield "select {} in (select {} from tab_00386) from tab_00386".format( + column, column + ) for i in range(len(columns)): for j in range(i, len(columns)): - yield 'select ({}, {}) in tuple({}, {}) from tab_00386'.format(columns[i], columns[j], values[i], values[j]) - yield 'select ({}, {}) in (select {}, {} from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j]) - yield 'select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j]) + yield "select ({}, {}) in tuple({}, {}) from tab_00386".format( + columns[i], columns[j], values[i], values[j] + ) + yield "select ({}, {}) in (select {}, {} from tab_00386) from tab_00386".format( + columns[i], columns[j], columns[i], columns[j] + ) + yield "select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386".format( + columns[i], columns[j], columns[i], columns[j] + ) yield "select e in (1, 'a') from tab_00386" yield "select f in tuple((1, 'a')) from tab_00386" yield "select f in tuple(tuple((1, 'a'))) from tab_00386" - yield 'select e in (select a, b from tab_00386) from tab_00386' - yield 'select e in (select (a, b) from tab_00386) from tab_00386' - yield 'select f in (select tuple((a, b)) from tab_00386) from tab_00386' - yield 'select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386' + yield "select e in (select a, b from tab_00386) from tab_00386" + yield "select e in (select (a, b) from tab_00386) from tab_00386" + yield "select f in (select tuple((a, b)) from tab_00386) from tab_00386" + yield "select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386" + import requests import os + def main(): - url = os.environ['CLICKHOUSE_URL'] + url = os.environ["CLICKHOUSE_URL"] for q in gen_queries(): resp = requests.post(url, data=q) - if resp.status_code != 200 or resp.text.strip() not in ('1', ''): - print('Query:', q) - print('Code:', resp.status_code) + if resp.status_code != 200 or resp.text.strip() not in ("1", ""): + print("Query:", q) + print("Code:", resp.status_code) print(resp.text) break - requests.post(url, data='drop table tab_00386') + requests.post(url, data="drop table tab_00386") + if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 3c8a8f2ea25..e713e8814bc 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -2,8 +2,20 @@ import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys + def get_ch_answer(query): - return urllib.request.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ), data=query.encode()).read().decode() + return ( + urllib.request.urlopen( + os.environ.get( + "CLICKHOUSE_URL", + "http://localhost:" + os.environ.get("CLICKHOUSE_PORT_HTTP", "8123"), + ), + data=query.encode(), + ) + .read() + .decode() + ) + def check_answers(query, answer): ch_answer = get_ch_answer(query) @@ -13,36 +25,34 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer) exit(-1) + def get_values(): values = [0, 1, -1] for bits in [8, 16, 32, 64]: values += [2**bits, 2**bits - 1] - values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1] - values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1] + values += [2 ** (bits - 1) - 1, 2 ** (bits - 1), 2 ** (bits - 1) + 1] + values += [-(2 ** (bits - 1)) - 1, -(2 ** (bits - 1)), -(2 ** (bits - 1)) + 1] return values + def is_valid_integer(x): - return -2**63 <= x and x <= 2**64-1 + return -(2**63) <= x and x <= 2**64 - 1 -TEST_WITH_CASTING=True -GENERATE_TEST_FILES=False +TEST_WITH_CASTING = True +GENERATE_TEST_FILES = False TYPES = { - "UInt8" : { "bits" : 8, "sign" : False, "float" : False }, - "Int8" : { "bits" : 8, "sign" : True, "float" : False }, - - "UInt16": { "bits" : 16, "sign" : False, "float" : False }, - "Int16" : { "bits" : 16, "sign" : True, "float" : False }, - - "UInt32": { "bits" : 32, "sign" : False, "float" : False }, - "Int32" : { "bits" : 32, "sign" : True, "float" : False }, - - "UInt64": { "bits" : 64, "sign" : False, "float" : False }, - "Int64" : { "bits" : 64, "sign" : True, "float" : False } - - #"Float32" : { "bits" : 32, "sign" : True, "float" : True }, - #"Float64" : { "bits" : 64, "sign" : True, "float" : True } + "UInt8": {"bits": 8, "sign": False, "float": False}, + "Int8": {"bits": 8, "sign": True, "float": False}, + "UInt16": {"bits": 16, "sign": False, "float": False}, + "Int16": {"bits": 16, "sign": True, "float": False}, + "UInt32": {"bits": 32, "sign": False, "float": False}, + "Int32": {"bits": 32, "sign": True, "float": False}, + "UInt64": {"bits": 64, "sign": False, "float": False}, + "Int64": {"bits": 64, "sign": True, "float": False} + # "Float32" : { "bits" : 32, "sign" : True, "float" : True }, + # "Float64" : { "bits" : 64, "sign" : True, "float" : True } } @@ -55,14 +65,18 @@ def inside_range(value, type_name): return True if signed: - return -2**(bits-1) <= value and value <= 2**(bits-1) - 1 + return -(2 ** (bits - 1)) <= value and value <= 2 ** (bits - 1) - 1 else: return 0 <= value and value <= 2**bits - 1 def test_operators(v1, v2, v1_passed, v2_passed): - query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed) - query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed) + query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format( + v1=v1_passed, v2=v2_passed + ) + query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format( + v1=v2_passed, v2=v1_passed + ) answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2] answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1] @@ -74,6 +88,7 @@ def test_operators(v1, v2, v1_passed, v2_passed): VALUES = [x for x in get_values() if is_valid_integer(x)] + def test_pair(v1, v2): query = "SELECT {}, {}, ".format(v1, v2) answers = "{}\t{}\t".format(v1, v2) @@ -87,19 +102,58 @@ def test_pair(v1, v2): if inside_range(v1, t1): for t2 in TYPES.keys(): if inside_range(v2, t2): - q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2)) - query += ', ' + q + q, a = test_operators( + v1, v2, "to{}({})".format(t1, v1), "to{}({})".format(t2, v2) + ) + query += ", " + q answers += "\t" + a check_answers(query, answers) return query, answers -VALUES_INT = [0, -1, 1, 2**64-1, 2**63, -2**63, 2**63-1, 2**51, 2**52, 2**53-1, 2**53, 2**53+1, 2**53+2, -2**53+1, -2**53, -2**53-1, -2**53-2, 2*52, -2**52] -VALUES_FLOAT = [float(x) for x in VALUES_INT + [-0.5, 0.5, -1.5, 1.5, 2**53, 2**51 - 0.5, 2**51 + 0.5, 2**60, -2**60, -2**63 - 10000, 2**63 + 10000]] +VALUES_INT = [ + 0, + -1, + 1, + 2**64 - 1, + 2**63, + -(2**63), + 2**63 - 1, + 2**51, + 2**52, + 2**53 - 1, + 2**53, + 2**53 + 1, + 2**53 + 2, + -(2**53) + 1, + -(2**53), + -(2**53) - 1, + -(2**53) - 2, + 2 * 52, + -(2**52), +] +VALUES_FLOAT = [ + float(x) + for x in VALUES_INT + + [ + -0.5, + 0.5, + -1.5, + 1.5, + 2**53, + 2**51 - 0.5, + 2**51 + 0.5, + 2**60, + -(2**60), + -(2**63) - 10000, + 2**63 + 10000, + ] +] + def test_float_pair(i, f): - f_str = ("%.9f" % f) + f_str = "%.9f" % f query = "SELECT '{}', '{}', ".format(i, f_str) answers = "{}\t{}\t".format(i, f_str) @@ -110,8 +164,8 @@ def test_float_pair(i, f): if TEST_WITH_CASTING: for t1 in TYPES.keys(): if inside_range(i, t1): - q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str) - query += ', ' + q + q, a = test_operators(i, f, "to{}({})".format(t1, i), f_str) + query += ", " + q answers += "\t" + a check_answers(query, answers) @@ -120,22 +174,26 @@ def test_float_pair(i, f): def main(): if GENERATE_TEST_FILES: - base_name = '00411_accurate_number_comparison' - sql_file = open(base_name + '.sql', 'wt') - ref_file = open(base_name + '.reference', 'wt') + base_name = "00411_accurate_number_comparison" + sql_file = open(base_name + ".sql", "wt") + ref_file = open(base_name + ".reference", "wt") num_int_tests = len(list(itertools.combinations(VALUES, 2))) num_parts = 4 for part in range(0, num_parts): - if 'int' + str(part + 1) in sys.argv[1:]: - for (v1, v2) in itertools.islice(itertools.combinations(VALUES, 2), part * num_int_tests // num_parts, (part + 1) * num_int_tests // num_parts): + if "int" + str(part + 1) in sys.argv[1:]: + for (v1, v2) in itertools.islice( + itertools.combinations(VALUES, 2), + part * num_int_tests // num_parts, + (part + 1) * num_int_tests // num_parts, + ): q, a = test_pair(v1, v2) if GENERATE_TEST_FILES: sql_file.write(q + ";\n") ref_file.write(a + "\n") - if 'float' in sys.argv[1:]: + if "float" in sys.argv[1:]: for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT): q, a = test_float_pair(i, f) if GENERATE_TEST_FILES: diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index 5f3b7546dd5..0a26f8039c2 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -12,6 +12,7 @@ import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer + def is_ipv6(host): try: socket.inet_aton(host) @@ -19,6 +20,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -29,8 +31,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + +CSV_DATA = os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) +) -CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -68,18 +86,19 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + class CSVHTTPServer(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) - self.send_header('Content-type', 'text/csv') + self.send_header("Content-type", "text/csv") self.end_headers() def do_GET(self): self._set_headers() - with open(CSV_DATA, 'r') as fl: - reader = csv.reader(fl, delimiter=',') + with open(CSV_DATA, "r") as fl: + reader = csv.reader(fl, delimiter=",") for row in reader: - self.wfile.write((', '.join(row) + '\n').encode()) + self.wfile.write((", ".join(row) + "\n").encode()) return def do_HEAD(self): @@ -87,33 +106,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def read_chunk(self): - msg = '' + msg = "" while True: sym = self.rfile.read(1) - if sym == '': + if sym == "": break - msg += sym.decode('utf-8') - if msg.endswith('\r\n'): + msg += sym.decode("utf-8") + if msg.endswith("\r\n"): break length = int(msg[:-2], 16) if length == 0: - return '' + return "" content = self.rfile.read(length) - self.rfile.read(2) # read sep \r\n - return content.decode('utf-8') + self.rfile.read(2) # read sep \r\n + return content.decode("utf-8") def do_POST(self): - data = '' + data = "" while True: chunk = self.read_chunk() if not chunk: break data += chunk with StringIO(data) as fl: - reader = csv.reader(fl, delimiter=',') - with open(CSV_DATA, 'a') as d: + reader = csv.reader(fl, delimiter=",") + with open(CSV_DATA, "a") as d: for row in reader: - d.write(','.join(row) + '\n') + d.write(",".join(row) + "\n") self._set_headers() self.wfile.write(b"ok") @@ -124,6 +143,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) @@ -133,49 +153,76 @@ def start_server(): t = threading.Thread(target=httpd.serve_forever) return t, httpd + # test section -def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): - with open(CSV_DATA, 'w') as f: # clear file - f.write('') + +def test_select( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests=[], + answers=[], + test_data="", +): + with open(CSV_DATA, "w") as f: # clear file + f.write("") if test_data: - with open(CSV_DATA, 'w') as f: + with open(CSV_DATA, "w") as f: f.write(test_data + "\n") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for i in range(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) -def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): - with open(CSV_DATA, 'w') as f: # flush test file - f.write('') +def test_insert( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests_insert=[], + requests_select=[], + answers=[], +): + with open(CSV_DATA, "w") as f: # flush test file + f.write("") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) get_ch_answer(req.format(tbl=tbl)) - for i in range(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: @@ -185,9 +232,11 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" select_only_requests = { - "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), - "select numuint, count(*) from {tbl} group by numuint" : "2\t2", - "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'), + "select str,numuint,numint,double from {tbl}": test_data.replace(",", "\t"), + "select numuint, count(*) from {tbl} group by numuint": "2\t2", + "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[ + 0 + ].replace(",", "\t"), } insert_requests = [ @@ -196,21 +245,41 @@ def main(): ] select_requests = { - "select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]), - "select count(*) from {tbl}": '12', - 'select double, count(*) from {tbl} group by double order by double': "7.7\t2\n9.9\t10" + "select distinct numuint from {tbl} order by numuint": "\n".join( + [str(i) for i in range(11)] + ), + "select count(*) from {tbl}": "12", + "select double, count(*) from {tbl} group by double order by double": "7.7\t2\n9.9\t10", } t, httpd = start_server() t.start() # test table with url engine - test_select(table_name="test_table_select", requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data) + test_select( + table_name="test_table_select", + requests=list(select_only_requests.keys()), + answers=list(select_only_requests.values()), + test_data=test_data, + ) # test table function url - test_select(requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data) - #test insert into table with url engine - test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) - #test insert into table function url - test_insert(requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) + test_select( + requests=list(select_only_requests.keys()), + answers=list(select_only_requests.values()), + test_data=test_data, + ) + # test insert into table with url engine + test_insert( + table_name="test_table_insert", + requests_insert=insert_requests, + requests_select=list(select_requests.keys()), + answers=list(select_requests.values()), + ) + # test insert into table function url + test_insert( + requests_insert=insert_requests, + requests_select=list(select_requests.keys()), + answers=list(select_requests.values()), + ) httpd.shutdown() t.join() diff --git a/tests/queries/0_stateless/00990_hasToken.python b/tests/queries/0_stateless/00990_hasToken.python index 7d3775adc9d..e9bc514474a 100644 --- a/tests/queries/0_stateless/00990_hasToken.python +++ b/tests/queries/0_stateless/00990_hasToken.python @@ -12,35 +12,46 @@ HAYSTACKS = [ NEEDLE = "needle" -HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE) -NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE) +HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE) +NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE) + def replace_follow_case(replacement): def func(match): g = match.group() - if g.islower(): return replacement.lower() - if g.istitle(): return replacement.title() - if g.isupper(): return replacement.upper() + if g.islower(): + return replacement.lower() + if g.istitle(): + return replacement.title() + if g.isupper(): + return replacement.upper() return replacement + return func + def replace_separators(query, new_sep): - SEP_RE = re.compile('\\s+') + SEP_RE = re.compile("\\s+") result = SEP_RE.sub(new_sep, query) return result -def enlarge_haystack(query, times, separator=''): - return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query) + +def enlarge_haystack(query, times, separator=""): + return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query) + def small_needle(query): - return NEEDLE_RE.sub(replace_follow_case('n'), query) + return NEEDLE_RE.sub(replace_follow_case("n"), query) + def remove_needle(query): - return NEEDLE_RE.sub('', query) + return NEEDLE_RE.sub("", query) + def replace_needle(query, new_needle): return NEEDLE_RE.sub(new_needle, query) + # with str.lower, str.uppert, str.title and such def transform_needle(query, string_transformation_func): def replace_with_transformation(match): @@ -49,19 +60,21 @@ def transform_needle(query, string_transformation_func): return NEEDLE_RE.sub(replace_with_transformation, query) -def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template): + +def create_cases( + case_sensitive_func, + case_insensitive_func, + table_row_template, + table_query_template, + const_query_template, +): const_queries = [] table_rows = [] table_queries = set() def add_case(func, haystack, needle, match): match = int(match) - args = dict( - func = func, - haystack = haystack, - needle = needle, - match = match - ) + args = dict(func=func, haystack=haystack, needle=needle, match=match) const_queries.append(const_query_template.substitute(args)) table_queries.add(table_query_template.substitute(args)) table_rows.append(table_row_template.substitute(args)) @@ -69,14 +82,28 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, def add_case_sensitive(haystack, needle, match): add_case(case_sensitive_func, haystack, needle, match) if match: - add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match) + add_case( + case_sensitive_func, + transform_needle(haystack, str.swapcase), + transform_needle(needle, str.swapcase), + match, + ) def add_case_insensitive(haystack, needle, match): add_case(case_insensitive_func, haystack, needle, match) if match: - add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match) - add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match) - + add_case( + case_insensitive_func, + transform_needle(haystack, str.swapcase), + needle, + match, + ) + add_case( + case_insensitive_func, + haystack, + transform_needle(needle, str.swapcase), + match, + ) # Negative cases add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False) @@ -85,7 +112,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, for haystack in HAYSTACKS: add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False) - sep = '' + sep = "" h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, False) @@ -102,8 +129,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_sensitive(haystack, NEEDLE, True) add_case_insensitive(haystack, NEEDLE, True) - - for sep in list(''' ,'''): + for sep in list(""" ,"""): h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, True) add_case_sensitive(small_needle(h), small_needle(NEEDLE), True) @@ -114,32 +140,43 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True) # case insesitivity works only on ASCII strings - add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True) - add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True) + add_case_sensitive( + replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True + ) + add_case_sensitive( + replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True + ) - for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']: + for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]: h = replace_separators(HAYSTACKS[0], sep) add_case(case_sensitive_func, h, NEEDLE, True) return table_rows, table_queries, const_queries -def main(): +def main(): def query(x): print(x) - CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""") - TABLE_QUERY = Template("""WITH '${needle}' as n + CONST_QUERY = Template( + """SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""" + ) + TABLE_QUERY = Template( + """WITH '${needle}' as n SELECT haystack, needle, ${func}(haystack, n) as result FROM ht - WHERE func = '${func}' AND needle = n AND result != match;""") + WHERE func = '${func}' AND needle = n AND result != match;""" + ) TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""") - rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY) + rows, table_queries, const_queries = create_cases( + "hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY + ) for q in const_queries: query(q) - query("""DROP TABLE IF EXISTS ht; + query( + """DROP TABLE IF EXISTS ht; CREATE TABLE IF NOT EXISTS ht ( @@ -150,11 +187,15 @@ def main(): ) ENGINE MergeTree() ORDER BY haystack; -INSERT INTO ht VALUES {values};""".format(values=", ".join(rows))) +INSERT INTO ht VALUES {values};""".format( + values=", ".join(rows) + ) + ) for q in sorted(table_queries): query(q) query("""DROP TABLE ht""") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/00991_live_view_watch_event_live.python b/tests/queries/0_stateless/00991_live_view_watch_event_live.python index 901d388ec01..9b7a3300c15 100644 --- a/tests/queries/0_stateless/00991_live_view_watch_event_live.python +++ b/tests/queries/0_stateless/00991_live_view_watch_event_live.python @@ -8,28 +8,32 @@ import sys import signal -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') +CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT") +CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL") +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") def send_query(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def send_query_in_process_group(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid + ) def read_lines_and_push_to_queue(pipe, queue): try: - for line in iter(pipe.readline, ''): + for line in iter(pipe.readline, ""): line = line.strip() print(line) sys.stdout.flush() @@ -41,41 +45,44 @@ def read_lines_and_push_to_queue(pipe, queue): def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() + send_query("DROP TABLE IF EXISTS test.lv").read() + send_query("DROP TABLE IF EXISTS test.mt").read() + send_query( + "CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()" + ).read() + send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read() q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') + p = send_query_in_process_group("WATCH test.lv") thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) thread.start() line = q.get() print(line) - assert (line == '0\t1') + assert line == "0\t1" - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() + send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read() line = q.get() print(line) - assert (line == '6\t2') + assert line == "6\t2" - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() + send_query("INSERT INTO test.mt VALUES (4),(5),(6)").read() line = q.get() print(line) - assert (line == '21\t3') + assert line == "21\t3" # Send Ctrl+C to client. os.killpg(os.getpgid(p.pid), signal.SIGINT) # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() + send_query("INSERT INTO test.mt VALUES (7),(8),(9)").read() line = q.get() print(line) - assert (line is None) + assert line is None - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() + send_query("DROP TABLE if exists test.lv").read() + send_query("DROP TABLE if exists test.lv").read() thread.join() + test() diff --git a/tests/queries/0_stateless/00991_live_view_watch_http.python b/tests/queries/0_stateless/00991_live_view_watch_http.python index d5a1e6e8ed9..72c07b27d82 100755 --- a/tests/queries/0_stateless/00991_live_view_watch_http.python +++ b/tests/queries/0_stateless/00991_live_view_watch_http.python @@ -7,26 +7,30 @@ import os import sys -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') +CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT") +CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL") +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") def send_query(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def send_http_query(query): - cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) - cmd += ['-sSN', CLICKHOUSE_URL, '-d', query] - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) + cmd += ["-sSN", CLICKHOUSE_URL, "-d", query] + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def read_lines_and_push_to_queue(pipe, queue): - for line in iter(pipe.readline, ''): + for line in iter(pipe.readline, ""): line = line.strip() print(line) sys.stdout.flush() @@ -36,28 +40,31 @@ def read_lines_and_push_to_queue(pipe, queue): def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() + send_query("DROP TABLE IF EXISTS test.lv").read() + send_query("DROP TABLE IF EXISTS test.mt").read() + send_query( + "CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()" + ).read() + send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read() q = queue.Queue() - pipe = send_http_query('WATCH test.lv') + pipe = send_http_query("WATCH test.lv") thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q)) thread.start() line = q.get() print(line) - assert (line == '0\t1') + assert line == "0\t1" - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() + send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read() line = q.get() print(line) - assert (line == '6\t2') + assert line == "6\t2" - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() + send_query("DROP TABLE if exists test.lv").read() + send_query("DROP TABLE if exists test.lv").read() thread.join() + test() diff --git a/tests/queries/0_stateless/01558_ttest_scipy.python b/tests/queries/0_stateless/01558_ttest_scipy.python index 4d913d4292f..75e1c2701b2 100644 --- a/tests/queries/0_stateless/01558_ttest_scipy.python +++ b/tests/queries/0_stateless/01558_ttest_scipy.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from scipy import stats @@ -6,70 +6,86 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def test_and_check(name, a, b, t_stat, p_value, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ttest;") - client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM ttest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ttest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") def test_student(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) + def test_welch(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=15, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=7, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=3, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=5, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=10, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) + if __name__ == "__main__": test_student() test_welch() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 7958e8bbaf1..4713120287d 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -6,7 +6,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -14,40 +14,51 @@ from pure_http_client import ClickHouseClient def test_and_check(name, a, b, t_stat, p_value): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS mann_whitney;") - client.query("CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO mann_whitney VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)]))) + client.query( + "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;" + ) + client.query( + "INSERT INTO mann_whitney VALUES {};".format( + ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM mann_whitney FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM mann_whitney FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert abs( + real_t_stat - np.float64(t_stat) < 1e-2 + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < 1e-2 + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS mann_whitney;") def test_mann_whitney(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5) - rvs2 = np.round(stats.expon.rvs(scale=0.2,size=500), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='two-sided') + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 5) + rvs2 = np.round(stats.expon.rvs(scale=0.2, size=500), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p) equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5) - s, p = stats.mannwhitneyu(equal, equal, alternative='two-sided') + s, p = stats.mannwhitneyu(equal, equal, alternative="two-sided") test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p) - s, p = stats.mannwhitneyu(equal, equal, alternative='less', use_continuity=False) + s, p = stats.mannwhitneyu(equal, equal, alternative="less", use_continuity=False) test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p) - - rvs1 = np.round(stats.cauchy.rvs(scale=10,size=65536), 5) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='greater') + rvs1 = np.round(stats.cauchy.rvs(scale=10, size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=65536), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="greater") test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p) + if __name__ == "__main__": test_mann_whitney() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01626_cnf_fuzz_long.python b/tests/queries/0_stateless/01626_cnf_fuzz_long.python index 10c12d14182..de9e4a21dbb 100644 --- a/tests/queries/0_stateless/01626_cnf_fuzz_long.python +++ b/tests/queries/0_stateless/01626_cnf_fuzz_long.python @@ -4,14 +4,18 @@ from random import randint, choices import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient client = ClickHouseClient() N = 10 -create_query = "CREATE TABLE t_cnf_fuzz(" + ", ".join([f"c{i} UInt8" for i in range(N)]) + ") ENGINE = Memory" +create_query = ( + "CREATE TABLE t_cnf_fuzz(" + + ", ".join([f"c{i} UInt8" for i in range(N)]) + + ") ENGINE = Memory" +) client.query("DROP TABLE IF EXISTS t_cnf_fuzz") client.query(create_query) @@ -35,6 +39,7 @@ client.query(insert_query) MAX_CLAUSES = 10 MAX_ATOMS = 5 + def generate_dnf(): clauses = [] num_clauses = randint(1, MAX_CLAUSES) @@ -42,12 +47,17 @@ def generate_dnf(): num_atoms = randint(1, MAX_ATOMS) atom_ids = choices(range(N), k=num_atoms) negates = choices([0, 1], k=num_atoms) - atoms = [f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)] + atoms = [ + f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates) + ] clauses.append("(" + " AND ".join(atoms) + ")") return " OR ".join(clauses) -select_query = "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" + +select_query = ( + "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" +) fail_report = """ Failed query: '{}'. diff --git a/tests/queries/0_stateless/01654_test_writer_block_sequence.python b/tests/queries/0_stateless/01654_test_writer_block_sequence.python index e80cc273076..bc4e3da9ed5 100644 --- a/tests/queries/0_stateless/01654_test_writer_block_sequence.python +++ b/tests/queries/0_stateless/01654_test_writer_block_sequence.python @@ -5,15 +5,20 @@ import random import string CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def get_random_string(length): - return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + return "".join( + random.choice(string.ascii_uppercase + string.digits) for _ in range(length) + ) + client = ClickHouseClient() + def insert_block(table_name, block_granularity_rows, block_rows): global client block_data = [] @@ -25,9 +30,12 @@ def insert_block(table_name, block_granularity_rows, block_rows): values_row = ", ".join("(1, '" + row + "')" for row in block_data) client.query("INSERT INTO {} VALUES {}".format(table_name, values_row)) + try: client.query("DROP TABLE IF EXISTS t") - client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0") + client.query( + "CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0" + ) client.query("SYSTEM STOP MERGES t") @@ -53,6 +61,10 @@ try: client.query("SYSTEM START MERGES t") client.query("OPTIMIZE TABLE t FINAL") - print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0]) + print( + client.query_return_df( + "SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames" + )["C"][0] + ) finally: client.query("DROP TABLE IF EXISTS t") diff --git a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python index 4f6878665aa..7d98a24e83e 100644 --- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python +++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import csv import sys @@ -21,6 +21,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -31,8 +32,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -42,16 +44,24 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) # Because we need to check the content of file.csv we can create this content and avoid reading csv CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n" @@ -59,19 +69,24 @@ CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555 # Choose compression method # (Will change during test, need to check standard data sending, to make sure that nothing broke) -COMPRESS_METHOD = 'none' -ADDING_ENDING = '' -ENDINGS = ['.gz', '.xz'] +COMPRESS_METHOD = "none" +ADDING_ENDING = "" +ENDINGS = [".gz", ".xz"] SEND_ENCODING = True + def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -80,18 +95,19 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with head method which is useful for debuging by hands class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): self.send_response(200) if SEND_ENCODING: - self.send_header('Content-Encoding', COMPRESS_METHOD) - if COMPRESS_METHOD == 'none': - self.send_header('Content-Length', len(CSV_DATA.encode())) + self.send_header("Content-Encoding", COMPRESS_METHOD) + if COMPRESS_METHOD == "none": + self.send_header("Content-Length", len(CSV_DATA.encode())) else: self.compress_data() - self.send_header('Content-Length', len(self.data)) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Length", len(self.data)) + self.send_header("Content-Type", "text/csv") self.end_headers() def do_HEAD(self): @@ -99,18 +115,17 @@ class HttpProcessor(SimpleHTTPRequestHandler): return def compress_data(self): - if COMPRESS_METHOD == 'gzip': + if COMPRESS_METHOD == "gzip": self.data = gzip.compress((CSV_DATA).encode()) - elif COMPRESS_METHOD == 'lzma': + elif COMPRESS_METHOD == "lzma": self.data = lzma.compress((CSV_DATA).encode()) else: - self.data = 'WRONG CONVERSATION'.encode() - + self.data = "WRONG CONVERSATION".encode() def do_GET(self): self._set_headers() - if COMPRESS_METHOD == 'none': + if COMPRESS_METHOD == "none": self.wfile.write(CSV_DATA.encode()) else: self.wfile.write(self.data) @@ -119,9 +134,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -135,52 +152,60 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### -def test_select(dict_name="", schema="word String, counter UInt32", requests=[], answers=[], test_data=""): + +def test_select( + dict_name="", + schema="word String, counter UInt32", + requests=[], + answers=[], + test_data="", +): global ADDING_ENDING global SEND_ENCODING global COMPRESS_METHOD for i in range(len(requests)): if i > 2: - ADDING_ENDING = ENDINGS[i-3] + ADDING_ENDING = ENDINGS[i - 3] SEND_ENCODING = False if dict_name: get_ch_answer("drop dictionary if exists {}".format(dict_name)) - get_ch_answer('''CREATE DICTIONARY {} ({}) + get_ch_answer( + """CREATE DICTIONARY {} ({}) PRIMARY KEY word SOURCE(HTTP(url '{}' format 'CSV')) LAYOUT(complex_key_hashed()) - LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING)) + LIFETIME(0)""".format( + dict_name, schema, HTTP_SERVER_URL_STR + "/test.csv" + ADDING_ENDING + ) + ) COMPRESS_METHOD = requests[i] print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING) check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i]) + def main(): # first three for encoding, second three for url - insert_requests = [ - 'none', - 'gzip', - 'lzma', - 'gzip', - 'lzma' - ] + insert_requests = ["none", "gzip", "lzma", "gzip", "lzma"] # This answers got experemently in non compressed mode and they are correct - answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5 + answers = ["""Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213"""] * 5 t = start_server(len(insert_requests)) t.start() - test_select(dict_name="test_table_select", requests=insert_requests, answers=answers) + test_select( + dict_name="test_table_select", requests=insert_requests, answers=answers + ) t.join() print("PASSED") - if __name__ == "__main__": try: main() @@ -191,5 +216,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - - diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index e6d6f9e1317..09ac60405e7 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -5,9 +5,10 @@ import socket import os import uuid -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): @@ -24,12 +25,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +49,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +81,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('default', ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary("default", ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,78 +117,79 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet + def readHeader(s): packet_type = readVarUInt(s) - if packet_type == 2: # Exception + if packet_type == 2: # Exception raise RuntimeError(readException(s)) - assertPacket(packet_type, 1) # Data + assertPacket(packet_type, 1) # Data - readStringBinary(s) # external table name + readStringBinary(s) # external table name # BlockInfo - assertPacket(readVarUInt(s), 1) # 1 - assertPacket(readUInt8(s), 0) # is_overflows - assertPacket(readVarUInt(s), 2) # 2 - assertPacket(readUInt32(s), 4294967295) # bucket_num - assertPacket(readVarUInt(s), 0) # 0 - columns = readVarUInt(s) # rows - rows = readVarUInt(s) # columns + assertPacket(readVarUInt(s), 1) # 1 + assertPacket(readUInt8(s), 0) # is_overflows + assertPacket(readVarUInt(s), 2) # 2 + assertPacket(readUInt32(s), 4294967295) # bucket_num + assertPacket(readVarUInt(s), 0) # 0 + columns = readVarUInt(s) # rows + rows = readVarUInt(s) # columns print("Rows {} Columns {}".format(rows, columns)) for _ in range(columns): col_name = readStringBinary(s) @@ -194,9 +201,9 @@ def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def insertValidLowCardinalityRow(): @@ -205,7 +212,12 @@ def insertValidLowCardinalityRow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -213,25 +225,27 @@ def insertValidLowCardinalityRow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) # Fin block sendEmptyBlock(s) - assertPacket(readVarUInt(s), 5) # End of stream + assertPacket(readVarUInt(s), 5) # End of stream s.close() @@ -241,7 +255,12 @@ def insertLowCardinalityRowWithIndexOverflow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -249,19 +268,21 @@ def insertLowCardinalityRowWithIndexOverflow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 7 + [1]) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 7 + [1]) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) @@ -275,7 +296,12 @@ def insertLowCardinalityRowWithIncorrectDictType(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -283,32 +309,40 @@ def insertLowCardinalityRowWithIncorrectDictType(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 3] + [0] * 6 + ) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def insertLowCardinalityRowWithIncorrectAdditionalKeys(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(30) s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -316,30 +350,34 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 0] + [0] * 6 + ) # indexes type: UInt64 [3], with NO additional keys [0] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def main(): insertValidLowCardinalityRow() insertLowCardinalityRowWithIndexOverflow() insertLowCardinalityRowWithIncorrectDictType() insertLowCardinalityRowWithIncorrectAdditionalKeys() + if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python index 57b16fb413e..9b2e68a017d 100644 --- a/tests/queries/0_stateless/02126_url_auth.python +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -12,6 +12,7 @@ import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer + def is_ipv6(host): try: socket.inet_aton(host) @@ -19,6 +20,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -29,8 +31,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + +CSV_DATA = os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) +) -CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -68,15 +86,16 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + class CSVHTTPServer(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) - self.send_header('Content-type', 'text/csv') + self.send_header("Content-type", "text/csv") self.end_headers() def do_GET(self): self._set_headers() - self.wfile.write(('hello, world').encode()) + self.wfile.write(("hello, world").encode()) # with open(CSV_DATA, 'r') as fl: # reader = csv.reader(fl, delimiter=',') # for row in reader: @@ -84,33 +103,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def read_chunk(self): - msg = '' + msg = "" while True: sym = self.rfile.read(1) - if sym == '': + if sym == "": break - msg += sym.decode('utf-8') - if msg.endswith('\r\n'): + msg += sym.decode("utf-8") + if msg.endswith("\r\n"): break length = int(msg[:-2], 16) if length == 0: - return '' + return "" content = self.rfile.read(length) - self.rfile.read(2) # read sep \r\n - return content.decode('utf-8') + self.rfile.read(2) # read sep \r\n + return content.decode("utf-8") def do_POST(self): - data = '' + data = "" while True: chunk = self.read_chunk() if not chunk: break data += chunk with StringIO(data) as fl: - reader = csv.reader(fl, delimiter=',') - with open(CSV_DATA, 'a') as d: + reader = csv.reader(fl, delimiter=",") + with open(CSV_DATA, "a") as d: for row in reader: - d.write(','.join(row) + '\n') + d.write(",".join(row) + "\n") self._set_headers() self.wfile.write(b"ok") @@ -121,6 +140,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) @@ -130,57 +150,87 @@ def start_server(): t = threading.Thread(target=httpd.serve_forever) return t, httpd + # test section -def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): - with open(CSV_DATA, 'w') as f: # clear file - f.write('') + +def test_select( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests=[], + answers=[], + test_data="", +): + with open(CSV_DATA, "w") as f: # clear file + f.write("") if test_data: - with open(CSV_DATA, 'w') as f: + with open(CSV_DATA, "w") as f: f.write(test_data + "\n") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for i in range(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) -def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): - with open(CSV_DATA, 'w') as f: # flush test file - f.write('') + +def test_insert( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests_insert=[], + requests_select=[], + answers=[], +): + with open(CSV_DATA, "w") as f: # flush test file + f.write("") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) get_ch_answer(req.format(tbl=tbl)) - for i in range(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) + def test_select_url_engine(requests=[], answers=[], test_data=""): for i in range(len(requests)): check_answers(requests[i], answers[i]) + def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" """ @@ -203,19 +253,29 @@ def main(): """ if IS_IPV6: - query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" + query = ( + "select * from url('http://guest:guest@" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) else: - query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" - - + query = ( + "select * from url('http://guest:guest@" + + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) select_requests_url_auth = { - query : 'hello, world', + query: "hello, world", } t, httpd = start_server() t.start() - test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) + test_select( + requests=list(select_requests_url_auth.keys()), + answers=list(select_requests_url_auth.values()), + test_data=test_data, + ) httpd.shutdown() t.join() print("PASSED") diff --git a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python index d622004db28..0555f8c36ec 100644 --- a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from math import sqrt, nan @@ -8,7 +8,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -25,7 +25,7 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): return nan, nan, nan, nan z_stat = (p1 - p2) / se - one_side = 1 - stats.norm.cdf(abs(z_stat)) + one_side = 1 - stats.norm.cdf(abs(z_stat)) p_value = one_side * 2 z = stats.norm.ppf(1 - 0.5 * alpha) @@ -38,71 +38,171 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2): client = ClickHouseClient() real = client.query_return_df( - "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + - "roundBankers({}.2, 16) as p_value, ".format(name) + - "roundBankers({}.3, 16) as ci_lower, ".format(name) + - "roundBankers({}.4, 16) as ci_upper ".format(name) + - "FORMAT TabSeparatedWithNames;") - real_z_stat = real['z_stat'][0] - real_p_value = real['p_value'][0] - real_ci_lower = real['ci_lower'][0] - real_ci_upper = real['ci_upper'][0] - assert((np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(real_z_stat - np.float64(z_stat)) < precision), "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) - assert((np.isnan(real_p_value) and np.isnan(p_value)) or abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert((np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(real_ci_lower - np.float64(ci_lower)) < precision), "clickhouse_ci_lower {}, py_ci_lower {}".format(real_ci_lower, ci_lower) - assert((np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(real_ci_upper - np.float64(ci_upper)) < precision), "clickhouse_ci_upper {}, py_ci_upper {}".format(real_ci_upper, ci_upper) + "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + + "roundBankers({}.2, 16) as p_value, ".format(name) + + "roundBankers({}.3, 16) as ci_lower, ".format(name) + + "roundBankers({}.4, 16) as ci_upper ".format(name) + + "FORMAT TabSeparatedWithNames;" + ) + real_z_stat = real["z_stat"][0] + real_p_value = real["p_value"][0] + real_ci_lower = real["ci_lower"][0] + real_ci_upper = real["ci_upper"][0] + assert (np.isnan(real_z_stat) and np.isnan(z_stat)) or abs( + real_z_stat - np.float64(z_stat) + ) < precision, "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) + assert (np.isnan(real_p_value) and np.isnan(p_value)) or abs( + real_p_value - np.float64(p_value) + ) < precision, "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert (np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs( + real_ci_lower - np.float64(ci_lower) + ) < precision, "clickhouse_ci_lower {}, py_ci_lower {}".format( + real_ci_lower, ci_lower + ) + assert (np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs( + real_ci_upper - np.float64(ci_upper) + ) < precision, "clickhouse_ci_upper {}, py_ci_upper {}".format( + real_ci_upper, ci_upper + ) def test_mean_ztest(): counts = [0, 0] nobs = [0, 0] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [10, 10] nobs = [10, 10] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [16, 16] nobs = [16, 18] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [10, 20] nobs = [30, 40] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [20, 10] nobs = [40, 30] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(10,20), randrange(10,20)] - nobs = [randrange(counts[0] + 1, counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + counts = [randrange(10, 20), randrange(10, 20)] + nobs = [ + randrange(counts[0] + 1, counts[0] * 2), + randrange(counts[1], counts[1] * 2), + ] + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,100), randrange(1,200)] + counts = [randrange(1, 100), randrange(1, 200)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,200), randrange(1,100)] + counts = [randrange(1, 200), randrange(1, 100)] nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,1000), randrange(1,1000)] + counts = [randrange(1, 1000), randrange(1, 1000)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) if __name__ == "__main__": test_mean_ztest() print("Ok.") - diff --git a/tests/queries/0_stateless/02158_ztest_cmp.python b/tests/queries/0_stateless/02158_ztest_cmp.python index 8fc22d78e74..9591a150337 100644 --- a/tests/queries/0_stateless/02158_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from statistics import variance @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -30,46 +30,95 @@ def twosample_mean_ztest(rvs1, rvs2, alpha=0.05): def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ztest;") - client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + - "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + - "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + - "FROM ztest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - real_ci_low = real['ci_low'][0] - real_ci_high = real['ci_high'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert(abs(real_ci_low - np.float64(ci_low)) < precision), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) - assert(abs(real_ci_high - np.float64(ci_high)) < precision), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + + "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + + "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + + "FROM ztest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + real_ci_low = real["ci_low"][0] + real_ci_high = real["ci_high"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert ( + abs(real_ci_low - np.float64(ci_low)) < precision + ), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) + assert ( + abs(real_ci_high - np.float64(ci_high)) < precision + ), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) client.query("DROP TABLE IF EXISTS ztest;") def test_mean_ztest(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) if __name__ == "__main__": diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.python b/tests/queries/0_stateless/02187_async_inserts_all_formats.python index 65a323ef9db..fa555c78f8b 100644 --- a/tests/queries/0_stateless/02187_async_inserts_all_formats.python +++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.python @@ -3,47 +3,71 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') -CLICKHOUSE_TMP = os.environ.get('CLICKHOUSE_TMP') +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") +CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP") from pure_http_client import ClickHouseClient client = ClickHouseClient() + def run_test(data_format, gen_data_template, settings): print(data_format) client.query("TRUNCATE TABLE t_async_insert") expected = client.query(gen_data_template.format("TSV")).strip() - data = client.query(gen_data_template.format(data_format), settings=settings,binary_result=True) + data = client.query( + gen_data_template.format(data_format), settings=settings, binary_result=True + ) insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format) client.query_with_data(insert_query, data, settings=settings) result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip() if result != expected: - print("Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(data_format, expected, result)) + print( + "Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format( + data_format, expected, result + ) + ) exit(1) -formats = client.query("SELECT name FROM system.formats WHERE is_input AND is_output \ - AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name").strip().split('\n') + +formats = ( + client.query( + "SELECT name FROM system.formats WHERE is_input AND is_output \ + AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name" + ) + .strip() + .split("\n") +) # Generic formats client.query("DROP TABLE IF EXISTS t_async_insert") -client.query("CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory") +client.query( + "CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" +) gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}" for data_format in formats: - run_test(data_format, gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) + run_test( + data_format, + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, + ) # LineAsString client.query("DROP TABLE IF EXISTS t_async_insert") client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory") gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}" -run_test('LineAsString', gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) +run_test( + "LineAsString", + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, +) # TODO: add CapnProto and Protobuf diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index 0d3a563c094..5787ae186ab 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import sys import threading @@ -17,6 +17,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -27,20 +28,19 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") # Server returns this JSON response. -SERVER_JSON_RESPONSE = \ -'''{ +SERVER_JSON_RESPONSE = """{ "login": "ClickHouse", "id": 54801242, "name": "ClickHouse", "company": null -}''' +}""" -EXPECTED_ANSWER = \ -'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}''' +EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}""" ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -51,26 +51,38 @@ EXPECTED_ANSWER = \ ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -79,16 +91,17 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with check for User-Agent headers. class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): - user_agent = self.headers.get('User-Agent') - if user_agent and user_agent.startswith('ClickHouse/'): + user_agent = self.headers.get("User-Agent") + if user_agent and user_agent.startswith("ClickHouse/"): self.send_response(200) else: self.send_response(403) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Type", "text/csv") self.end_headers() def do_GET(self): @@ -98,9 +111,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -114,15 +129,18 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### + def test_select(): global HTTP_SERVER_URL_STR - query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR) + query = "SELECT * FROM url('{}','JSONAsString');".format(HTTP_SERVER_URL_STR) check_answers(query, EXPECTED_ANSWER) + def main(): # HEAD + GET t = start_server(3) @@ -131,6 +149,7 @@ def main(): t.join() print("PASSED") + if __name__ == "__main__": try: main() @@ -141,4 +160,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python index e74d494edf5..66ef3304098 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.python +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -122,7 +122,7 @@ class HttpProcessor(BaseHTTPRequestHandler): get_call_num = 0 responses_to_get = [] - def send_head(self, from_get = False): + def send_head(self, from_get=False): if self.headers["Range"] and HttpProcessor.allow_range: try: self.range = parse_byte_range(self.headers["Range"]) @@ -146,7 +146,9 @@ class HttpProcessor(BaseHTTPRequestHandler): self.send_error(416, "Requested Range Not Satisfiable") return None - retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + retry_range_request = ( + first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + ) if retry_range_request: code = HttpProcessor.responses_to_get.pop() if code not in HttpProcessor.responses: @@ -244,7 +246,9 @@ def run_test(allow_range, settings, check_retries=False): raise Exception("HTTP Range was not used when supported") if check_retries and len(HttpProcessor.responses_to_get) > 0: - raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried") + raise Exception( + "Expected to get http response 500, which had to be retried, but 200 ok returned and then retried" + ) if retries_num > 0: expected_get_call_num += retries_num - 1 @@ -263,7 +267,7 @@ def run_test(allow_range, settings, check_retries=False): def main(): - settings = {"max_download_buffer_size" : 20} + settings = {"max_download_buffer_size": 20} # Test Accept-Ranges=False run_test(allow_range=False, settings=settings) @@ -271,7 +275,7 @@ def main(): run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is used - settings = {"max_download_buffer_size" : 10} + settings = {"max_download_buffer_size": 10} run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is not used, diff --git a/tests/queries/0_stateless/02294_anova_cmp.python b/tests/queries/0_stateless/02294_anova_cmp.python index 7597b3712d1..2212a887b2f 100644 --- a/tests/queries/0_stateless/02294_anova_cmp.python +++ b/tests/queries/0_stateless/02294_anova_cmp.python @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -22,15 +22,22 @@ def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2): client.query("DROP TABLE IF EXISTS anova;") client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;") for group in range(n_groups): - client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''') + client.query( + f"""INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};""" + ) real = client.query_return_df( - '''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''') + """SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;""" + ) - real_f_stat = real['f_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" - assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" + real_f_stat = real["f_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_f_stat - np.float64(f_stat)) < precision + ), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" client.query("DROP TABLE IF EXISTS anova;") diff --git a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python index 399533480a9..7f52daeb408 100644 --- a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python +++ b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python @@ -123,10 +123,14 @@ Uses FinishSortingTransform: {} for query in queries: check_query(query["where"], query["order_by"], query["optimize"], False) - check_query(query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) where_columns = [f"bitNot({col})" for col in query["where"]] check_query(where_columns, query["order_by"], query["optimize"], False) - check_query(where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) print("OK") diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 4e2e97e487b..4d2f01db55b 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -8,8 +8,8 @@ TRANSFER_ENCODING_HEADER = "Transfer-Encoding" def main(): - host = os.environ['CLICKHOUSE_HOST'] - port = int(os.environ['CLICKHOUSE_PORT_HTTP']) + host = os.environ["CLICKHOUSE_HOST"] + port = int(os.environ["CLICKHOUSE_PORT_HTTP"]) sock = socket(AF_INET, SOCK_STREAM) sock.connect((host, port)) @@ -47,4 +47,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index c638b3d2040..9d64201afd9 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -5,9 +5,10 @@ import os import uuid import json -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): @@ -24,12 +25,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +49,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +81,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,65 +117,65 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet -class Progress(): +class Progress: def __init__(self): # NOTE: this is done in ctor to initialize __dict__ self.read_rows = 0 @@ -198,11 +204,12 @@ class Progress(): def __bool__(self): return ( - self.read_rows > 0 or - self.read_bytes > 0 or - self.total_rows_to_read > 0 or - self.written_rows > 0 or - self.written_bytes > 0) + self.read_rows > 0 + or self.read_bytes > 0 + or self.total_rows_to_read > 0 + or self.written_rows > 0 + or self.written_bytes > 0 + ) def readProgress(s): @@ -219,13 +226,14 @@ def readProgress(s): progress.readPacket(s) return progress + def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def main(): @@ -236,7 +244,10 @@ def main(): receiveHello(s) # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000") + sendQuery( + s, + "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", + ) # external tables sendEmptyBlock(s) diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index 37a7280dac2..a942568233c 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -4,18 +4,19 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table - Deletes the specified range of rows - Masks another range using row-level policy - Runs some read queries and checks that the results - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -25,10 +26,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -36,113 +37,187 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def check_data( + self, + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): all_data_after_delete = all_data[ - ~((all_data.a == 0) & - (all_data.b > delete_range_start) & - (all_data.b <= delete_range_end))] + ~( + (all_data.a == 0) + & (all_data.b > delete_range_start) + & (all_data.b <= delete_range_end) + ) + ] all_data_after_row_policy = all_data_after_delete[ - (all_data_after_delete.b <= row_level_policy_range_start) | - (all_data_after_delete.b > row_level_policy_range_end)] + (all_data_after_delete.b <= row_level_policy_range_start) + | (all_data_after_delete.b > row_level_policy_range_end) + ] - for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + for to_select in [ + "count()", + "sum(d)", + ]: # Test reading with and without column with default value + self.query( + "SELECT {} FROM tab_02473;".format(to_select), + False, + all_data_after_row_policy, + ) delta = 10 for query_range_start in [0, delta]: - for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + for query_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.b > query_range_start) & - (all_data_after_row_policy.b <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.b > query_range_start) + & (all_data_after_row_policy.b <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.c > query_range_start) & - (all_data_after_row_policy.c <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.c > query_range_start) + & (all_data_after_row_policy.c <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - ((all_data_after_row_policy.c <= query_range_start) | - (all_data_after_row_policy.c > query_range_end))] - self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & ( + (all_data_after_row_policy.c <= query_range_start) + | (all_data_after_row_policy.c > query_range_end) + ) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) - - def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def run_test( + self, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;") self.check_data(all_data, -100, -100, -100, -100) - self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( - delete_range_start, delete_range_end)) + self.query( + "DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format( + delete_range_start, delete_range_end + ) + ) self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) - self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( - row_level_policy_range_start, row_level_policy_range_end)) + self.query( + "CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format( + row_level_policy_range_start, row_level_policy_range_end + ) + ) - self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + self.check_data( + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) - self.query('DROP POLICY policy_tab_02473 ON tab_02473;') - - self.query('DROP TABLE tab_02473;') + self.query("DROP POLICY policy_tab_02473 ON tab_02473;") + self.query("DROP TABLE tab_02473;") def main(): # Set mutations to synchronous mode and enable lightweight DELETE's - url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1' + url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1" - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + for index_granularity in [ + default_index_granularity - 1, + default_index_granularity, + ]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of various size masked by lightweight DELETES # along with ranges of various size masked by row-level policies for delete_range_start in range(0, total_rows, 3 * step): - for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for delete_range_end in range( + delete_range_start + 3 * step, total_rows, 2 * step + ): for row_level_policy_range_start in range(0, total_rows, 3 * step): - for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): - tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + for row_level_policy_range_end in range( + row_level_policy_range_start + 3 * step, total_rows, 2 * step + ): + tester.run_test( + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.python b/tests/queries/0_stateless/02473_multistep_split_prewhere.python index 41d8a746e11..19444994fd2 100644 --- a/tests/queries/0_stateless/02473_multistep_split_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.python @@ -4,16 +4,17 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table with multiple integer columns - Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -23,10 +24,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -34,98 +35,150 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end): - for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data) + def check_data( + self, all_data, c_range_start, c_range_end, d_range_start, d_range_end + ): + for to_select in [ + "count()", + "sum(e)", + ]: # Test reading with and without column with default value + self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data) delta = 10 for b_range_start in [0, delta]: - for b_range_end in [self.total_rows - delta]: #, self.total_rows]: + for b_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, b_range_start, b_range_end + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end) & - (all_data.d > d_range_start) & - (all_data.d <= d_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected) - + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + & (all_data.d > d_range_start) + & (all_data.d <= d_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + d_range_start, + d_range_end, + ), + False, + expected, + ) def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;") - self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end) - - self.query('DROP TABLE tab_02473;') + self.check_data( + all_data, c_range_start, c_range_end, d_range_start, d_range_end + ) + self.query("DROP TABLE tab_02473;") def main(): # Enable multiple prewhere read steps - url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1' + url = ( + os.environ["CLICKHOUSE_URL"] + + "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1" + ) - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: + for index_granularity in [default_index_granularity - 1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of columns c and d for c_range_start in range(0, total_rows, int(2.3 * step)): - for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)): - for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)): - for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)): - tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end) + for c_range_end in range( + c_range_start + 3 * step, total_rows, int(2.1 * step) + ): + for d_range_start in range( + int(0.5 * step), total_rows, int(2.7 * step) + ): + for d_range_end in range( + d_range_start + 3 * step, total_rows, int(2.2 * step) + ): + tester.run_test( + c_range_start, c_range_end, d_range_start, d_range_end + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 0cea7301ce5..1bf0edcbd05 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -8,7 +8,7 @@ import time from threading import Thread CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -16,14 +16,23 @@ client = ClickHouseClient() # test table without partition client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") -client.query(''' +client.query( + """ CREATE TABLE t_async_insert_dedup_no_part ( KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') ORDER BY (KeyID) -''') +""" +) -client.query("insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", settings = {"async_insert": 1, "wait_for_async_insert": 1, "insert_keeper_fault_injection_probability": 0}) +client.query( + "insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", + settings={ + "async_insert": 1, + "wait_for_async_insert": 1, + "insert_keeper_fault_injection_probability": 0, + }, +) result = client.query("select count(*) from t_async_insert_dedup_no_part") print(result, flush=True) client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") @@ -32,13 +41,13 @@ client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") def generate_data(q, total_number): old_data = [] max_chunk_size = 30 - partitions = ['2022-11-11 10:10:10', '2022-12-12 10:10:10'] + partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"] last_number = 0 while True: - dup_simulate = random.randint(0,3) + dup_simulate = random.randint(0, 3) # insert old data randomly. 25% of them are dup. if dup_simulate == 0: - last_idx = len(old_data)-1 + last_idx = len(old_data) - 1 if last_idx < 0: continue idx = last_idx - random.randint(0, 50) @@ -53,7 +62,7 @@ def generate_data(q, total_number): end = start + chunk_size if end > total_number: end = total_number - for i in range(start, end+1): + for i in range(start, end + 1): partition = partitions[random.randint(0, 1)] insert_stmt += "('{}', {}),".format(partition, i) insert_stmt = insert_stmt[:-1] @@ -65,33 +74,46 @@ def generate_data(q, total_number): # wait all the tasks is done. q.join() + def fetch_and_insert_data(q, client): while True: insert = q.get() - client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) + client.query( + insert, + settings={ + "async_insert": 1, + "async_insert_deduplicate": 1, + "wait_for_async_insert": 0, + "async_insert_busy_timeout_ms": 1500, + "insert_keeper_fault_injection_probability": 0, + }, + ) q.task_done() sleep_time = random.randint(50, 500) - time.sleep(sleep_time/1000.0) + time.sleep(sleep_time / 1000.0) + # main process client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY") -client.query(''' +client.query( + """ CREATE TABLE t_async_insert_dedup ( EventDate DateTime, KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') PARTITION BY toYYYYMM(EventDate) ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 -''') +""" +) q = queue.Queue(100) total_number = 10000 -gen = Thread(target = generate_data, args = [q, total_number]) +gen = Thread(target=generate_data, args=[q, total_number]) gen.start() for i in range(3): - insert = Thread(target = fetch_and_insert_data, args = [q, client]) + insert = Thread(target=fetch_and_insert_data, args=[q, client]) insert.start() gen.join() @@ -109,7 +131,7 @@ while True: errMsg = f"the size of result is {len(result)}. we expect {total_number}." else: for i in range(total_number): - expect = str(i+1) + expect = str(i + 1) real = result[i] if expect != real: err = True @@ -117,7 +139,7 @@ while True: break # retry several times to get stable results. if err and retry >= 5: - print (errMsg, flush=True) + print(errMsg, flush=True) elif err: retry += 1 continue @@ -125,11 +147,15 @@ while True: print(len(result), flush=True) break -result = client.query("SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'") +result = client.query( + "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}") -result = client.query("SELECT value FROM system.events where event = 'AsyncInsertCacheHits'") +result = client.query( + "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}") diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics index cf65e4efbfb..5cacbf1d4d4 100755 --- a/utils/clickhouse-diagnostics/clickhouse-diagnostics +++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics @@ -19,9 +19,9 @@ import tenacity import xmltodict import yaml -SELECT_VERSION = r'SELECT version()' +SELECT_VERSION = r"SELECT version()" -SELECT_UPTIME = r''' +SELECT_UPTIME = r""" {% if version_ge('21.3') -%} SELECT formatReadableTimeDelta(uptime()) {% else -%} @@ -29,18 +29,18 @@ SELECT toString(floor(uptime() / 3600 / 24)) || ' days ' || toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours' {% endif -%} -''' +""" SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'" -SELECT_DATABASE_ENGINES = r'''SELECT +SELECT_DATABASE_ENGINES = r"""SELECT engine, count() "count" FROM system.databases GROUP BY engine -''' +""" -SELECT_DATABASES = r'''SELECT +SELECT_DATABASES = r"""SELECT name, engine, tables, @@ -62,17 +62,17 @@ LEFT JOIN ) AS db_stats ON db.name = db_stats.database ORDER BY bytes_on_disk DESC LIMIT 10 -''' +""" -SELECT_TABLE_ENGINES = r'''SELECT +SELECT_TABLE_ENGINES = r"""SELECT engine, count() "count" FROM system.tables WHERE database != 'system' GROUP BY engine -''' +""" -SELECT_DICTIONARIES = r'''SELECT +SELECT_DICTIONARIES = r"""SELECT source, type, status, @@ -80,13 +80,13 @@ SELECT_DICTIONARIES = r'''SELECT FROM system.dictionaries GROUP BY source, type, status ORDER BY status DESC, source -''' +""" SELECT_ACCESS = "SHOW ACCESS" SELECT_QUOTA_USAGE = "SHOW QUOTA" -SELECT_REPLICAS = r'''SELECT +SELECT_REPLICAS = r"""SELECT database, table, is_leader, @@ -98,9 +98,9 @@ SELECT_REPLICAS = r'''SELECT FROM system.replicas ORDER BY absolute_delay DESC LIMIT 10 -''' +""" -SELECT_REPLICATION_QUEUE = r'''SELECT +SELECT_REPLICATION_QUEUE = r"""SELECT database, table, replica_name, @@ -121,9 +121,9 @@ SELECT_REPLICATION_QUEUE = r'''SELECT FROM system.replication_queue ORDER BY create_time ASC LIMIT 20 -''' +""" -SELECT_REPLICATED_FETCHES = r'''SELECT +SELECT_REPLICATED_FETCHES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -140,9 +140,9 @@ SELECT_REPLICATED_FETCHES = r'''SELECT to_detached, thread_id FROM system.replicated_fetches -''' +""" -SELECT_PARTS_PER_TABLE = r'''SELECT +SELECT_PARTS_PER_TABLE = r"""SELECT database, table, count() "partitions", @@ -162,9 +162,9 @@ FROM GROUP BY database, table ORDER BY max_parts_per_partition DESC LIMIT 10 -''' +""" -SELECT_MERGES = r'''SELECT +SELECT_MERGES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -187,9 +187,9 @@ SELECT_MERGES = r'''SELECT formatReadableSize(memory_usage) "memory_usage" {% endif -%} FROM system.merges -''' +""" -SELECT_MUTATIONS = r'''SELECT +SELECT_MUTATIONS = r"""SELECT database, table, mutation_id, @@ -206,9 +206,9 @@ SELECT_MUTATIONS = r'''SELECT FROM system.mutations WHERE NOT is_done ORDER BY create_time DESC -''' +""" -SELECT_RECENT_DATA_PARTS = r'''SELECT +SELECT_RECENT_DATA_PARTS = r"""SELECT database, table, engine, @@ -242,9 +242,9 @@ SELECT_RECENT_DATA_PARTS = r'''SELECT FROM system.parts WHERE modification_time > now() - INTERVAL 3 MINUTE ORDER BY modification_time DESC -''' +""" -SELECT_DETACHED_DATA_PARTS = r'''SELECT +SELECT_DETACHED_DATA_PARTS = r"""SELECT database, table, partition_id, @@ -255,9 +255,9 @@ SELECT_DETACHED_DATA_PARTS = r'''SELECT max_block_number, level FROM system.detached_parts -''' +""" -SELECT_PROCESSES = r'''SELECT +SELECT_PROCESSES = r"""SELECT elapsed, query_id, {% if normalize_queries -%} @@ -285,9 +285,9 @@ SELECT_PROCESSES = r'''SELECT {% endif -%} FROM system.processes ORDER BY elapsed DESC -''' +""" -SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT +SELECT_TOP_QUERIES_BY_DURATION = r"""SELECT type, query_start_time, query_duration_ms, @@ -339,9 +339,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY query_duration_ms DESC LIMIT 10 -''' +""" -SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT +SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r"""SELECT type, query_start_time, query_duration_ms, @@ -393,9 +393,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY memory_usage DESC LIMIT 10 -''' +""" -SELECT_FAILED_QUERIES = r'''SELECT +SELECT_FAILED_QUERIES = r"""SELECT type, query_start_time, query_duration_ms, @@ -448,9 +448,9 @@ WHERE type != 'QueryStart' AND exception != '' ORDER BY query_start_time DESC LIMIT 10 -''' +""" -SELECT_STACK_TRACES = r'''SELECT +SELECT_STACK_TRACES = r"""SELECT '\n' || arrayStringConcat( arrayMap( x, @@ -459,9 +459,9 @@ SELECT_STACK_TRACES = r'''SELECT arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') AS trace FROM system.stack_trace -''' +""" -SELECT_CRASH_LOG = r'''SELECT +SELECT_CRASH_LOG = r"""SELECT event_time, signal, thread_id, @@ -470,7 +470,7 @@ SELECT_CRASH_LOG = r'''SELECT version FROM system.crash_log ORDER BY event_time DESC -''' +""" def retry(exception_types, max_attempts=5, max_interval=5): @@ -481,7 +481,8 @@ def retry(exception_types, max_attempts=5, max_interval=5): retry=tenacity.retry_if_exception_type(exception_types), wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval), stop=tenacity.stop_after_attempt(max_attempts), - reraise=True) + reraise=True, + ) class ClickhouseError(Exception): @@ -502,9 +503,9 @@ class ClickhouseClient: def __init__(self, *, host="localhost", port=8123, user="default", password): self._session = requests.Session() if user: - self._session.headers['X-ClickHouse-User'] = user - self._session.headers['X-ClickHouse-Key'] = password - self._url = f'http://{host}:{port}' + self._session.headers["X-ClickHouse-User"] = user + self._session.headers["X-ClickHouse-Key"] = password + self._url = f"http://{host}:{port}" self._timeout = 60 self._ch_version = None @@ -516,7 +517,16 @@ class ClickhouseClient: return self._ch_version @retry(requests.exceptions.ConnectionError) - def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False): + def query( + self, + query, + query_args=None, + format=None, + post_data=None, + timeout=None, + echo=False, + dry_run=False, + ): """ Execute query. """ @@ -524,28 +534,30 @@ class ClickhouseClient: query = self.render_query(query, **query_args) if format: - query += f' FORMAT {format}' + query += f" FORMAT {format}" if timeout is None: timeout = self._timeout if echo: - print(sqlparse.format(query, reindent=True), '\n') + print(sqlparse.format(query, reindent=True), "\n") if dry_run: return None try: - response = self._session.post(self._url, - params={ - 'query': query, - }, - json=post_data, - timeout=timeout) + response = self._session.post( + self._url, + params={ + "query": query, + }, + json=post_data, + timeout=timeout, + ) response.raise_for_status() - if format in ('JSON', 'JSONCompact'): + if format in ("JSON", "JSONCompact"): return response.json() return response.text.strip() @@ -555,7 +567,9 @@ class ClickhouseClient: def render_query(self, query, **kwargs): env = jinja2.Environment() - env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version) + env.globals["version_ge"] = lambda version: version_ge( + self.clickhouse_version, version + ) template = env.from_string(query) return template.render(kwargs) @@ -578,11 +592,13 @@ class ClickhouseConfig: @classmethod def load(cls): - return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml')) + return ClickhouseConfig( + cls._load_config("/var/lib/clickhouse/preprocessed_configs/config.xml") + ) @staticmethod def _load_config(config_path): - with open(config_path, 'r') as file: + with open(config_path, "r") as file: return xmltodict.parse(file.read()) @classmethod @@ -591,8 +607,8 @@ class ClickhouseConfig: for key, value in list(config.items()): if isinstance(value, MutableMapping): cls._mask_secrets(config[key]) - elif key in ('password', 'secret_access_key', 'header', 'identity'): - config[key] = '*****' + elif key in ("password", "secret_access_key", "header", "identity"): + config[key] = "*****" class DiagnosticsData: @@ -603,53 +619,53 @@ class DiagnosticsData: def __init__(self, args): self.args = args self.host = args.host - self._sections = [{'section': None, 'data': {}}] + self._sections = [{"section": None, "data": {}}] def add_string(self, name, value, section=None): self._section(section)[name] = { - 'type': 'string', - 'value': value, + "type": "string", + "value": value, } def add_xml_document(self, name, document, section=None): self._section(section)[name] = { - 'type': 'xml', - 'value': document, + "type": "xml", + "value": document, } def add_query(self, name, query, result, section=None): self._section(section)[name] = { - 'type': 'query', - 'query': query, - 'result': result, + "type": "query", + "query": query, + "result": result, } def add_command(self, name, command, result, section=None): self._section(section)[name] = { - 'type': 'command', - 'command': command, - 'result': result, + "type": "command", + "command": command, + "result": result, } def dump(self, format): - if format.startswith('json'): + if format.startswith("json"): result = self._dump_json() - elif format.startswith('yaml'): + elif format.startswith("yaml"): result = self._dump_yaml() else: result = self._dump_wiki() - if format.endswith('.gz'): - compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer) + if format.endswith(".gz"): + compressor = gzip.GzipFile(mode="wb", fileobj=sys.stdout.buffer) compressor.write(result.encode()) else: print(result) def _section(self, name=None): - if self._sections[-1]['section'] != name: - self._sections.append({'section': name, 'data': {}}) + if self._sections[-1]["section"] != name: + self._sections.append({"section": name, "data": {}}) - return self._sections[-1]['data'] + return self._sections[-1]["data"] def _dump_json(self): """ @@ -669,85 +685,85 @@ class DiagnosticsData: """ def _write_title(buffer, value): - buffer.write(f'### {value}\n') + buffer.write(f"### {value}\n") def _write_subtitle(buffer, value): - buffer.write(f'#### {value}\n') + buffer.write(f"#### {value}\n") def _write_string_item(buffer, name, item): - value = item['value'] - if value != '': - value = f'**{value}**' - buffer.write(f'{name}: {value}\n') + value = item["value"] + if value != "": + value = f"**{value}**" + buffer.write(f"{name}: {value}\n") def _write_xml_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_result(buffer, item['value'], format='XML') + _write_result(buffer, item["value"], format="XML") def _write_query_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_query(buffer, item['query']) - _write_result(buffer, item['result']) + _write_query(buffer, item["query"]) + _write_result(buffer, item["result"]) def _write_command_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_command(buffer, item['command']) - _write_result(buffer, item['result']) + _write_command(buffer, item["command"]) + _write_result(buffer, item["result"]) def _write_unknown_item(buffer, section_name, name, item): if section_name: - buffer.write(f'**{name}**\n') + buffer.write(f"**{name}**\n") else: _write_subtitle(buffer, name) json.dump(item, buffer, indent=2) def _write_query(buffer, query): - buffer.write('**query**\n') - buffer.write('```sql\n') + buffer.write("**query**\n") + buffer.write("```sql\n") buffer.write(query) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_command(buffer, command): - buffer.write('**command**\n') - buffer.write('```\n') + buffer.write("**command**\n") + buffer.write("```\n") buffer.write(command) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_result(buffer, result, format=None): - buffer.write('**result**\n') - buffer.write(f'```{format}\n' if format else '```\n') + buffer.write("**result**\n") + buffer.write(f"```{format}\n" if format else "```\n") buffer.write(result) - buffer.write('\n```\n') + buffer.write("\n```\n") buffer = io.StringIO() - _write_title(buffer, f'Diagnostics data for host {self.host}') + _write_title(buffer, f"Diagnostics data for host {self.host}") for section in self._sections: - section_name = section['section'] + section_name = section["section"] if section_name: _write_subtitle(buffer, section_name) - for name, item in section['data'].items(): - if item['type'] == 'string': + for name, item in section["data"].items(): + if item["type"] == "string": _write_string_item(buffer, name, item) - elif item['type'] == 'query': + elif item["type"] == "query": _write_query_item(buffer, section_name, name, item) - elif item['type'] == 'command': + elif item["type"] == "command": _write_command_item(buffer, section_name, name, item) - elif item['type'] == 'xml': + elif item["type"] == "xml": _write_xml_item(buffer, section_name, name, item) else: _write_unknown_item(buffer, section_name, name, item) @@ -760,126 +776,196 @@ def main(): Program entry point. """ args = parse_args() - timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S') - client = ClickhouseClient(host=args.host, port=args.port, user=args.user, password=args.password) + timestamp = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") + client = ClickhouseClient( + host=args.host, port=args.port, user=args.user, password=args.password + ) ch_config = ClickhouseConfig.load() version = client.clickhouse_version - system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']] + system_tables = [ + row[0] + for row in execute_query(client, SELECT_SYSTEM_TABLES, format="JSONCompact")[ + "data" + ] + ] diagnostics = DiagnosticsData(args) - diagnostics.add_string('Version', version) - diagnostics.add_string('Timestamp', timestamp) - diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME)) + diagnostics.add_string("Version", version) + diagnostics.add_string("Timestamp", timestamp) + diagnostics.add_string("Uptime", execute_query(client, SELECT_UPTIME)) - diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump()) + diagnostics.add_xml_document("ClickHouse configuration", ch_config.dump()) - if version_ge(version, '20.8'): - add_query(diagnostics, 'Access configuration', - client=client, - query=SELECT_ACCESS, - format='TSVRaw') - add_query(diagnostics, 'Quotas', - client=client, - query=SELECT_QUOTA_USAGE, - format='Vertical') + if version_ge(version, "20.8"): + add_query( + diagnostics, + "Access configuration", + client=client, + query=SELECT_ACCESS, + format="TSVRaw", + ) + add_query( + diagnostics, + "Quotas", + client=client, + query=SELECT_QUOTA_USAGE, + format="Vertical", + ) - add_query(diagnostics, 'Database engines', - client=client, - query=SELECT_DATABASE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Databases (top 10 by size)', - client=client, - query=SELECT_DATABASES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Table engines', - client=client, - query=SELECT_TABLE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Dictionaries', - client=client, - query=SELECT_DICTIONARIES, - format='PrettyCompactNoEscapes', - section='Schema') + add_query( + diagnostics, + "Database engines", + client=client, + query=SELECT_DATABASE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Databases (top 10 by size)", + client=client, + query=SELECT_DATABASES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Table engines", + client=client, + query=SELECT_TABLE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Dictionaries", + client=client, + query=SELECT_DICTIONARIES, + format="PrettyCompactNoEscapes", + section="Schema", + ) - add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)', - client=client, - query=SELECT_REPLICAS, - format='PrettyCompactNoEscapes', - section='Replication') - add_query(diagnostics, 'Replication queue (top 20 oldest tasks)', - client=client, - query=SELECT_REPLICATION_QUEUE, - format='Vertical', - section='Replication') - if version_ge(version, '21.3'): - add_query(diagnostics, 'Replicated fetches', - client=client, - query=SELECT_REPLICATED_FETCHES, - format='Vertical', - section='Replication') + add_query( + diagnostics, + "Replicated tables (top 10 by absolute delay)", + client=client, + query=SELECT_REPLICAS, + format="PrettyCompactNoEscapes", + section="Replication", + ) + add_query( + diagnostics, + "Replication queue (top 20 oldest tasks)", + client=client, + query=SELECT_REPLICATION_QUEUE, + format="Vertical", + section="Replication", + ) + if version_ge(version, "21.3"): + add_query( + diagnostics, + "Replicated fetches", + client=client, + query=SELECT_REPLICATED_FETCHES, + format="Vertical", + section="Replication", + ) - add_query(diagnostics, 'Top 10 tables by max parts per partition', - client=client, - query=SELECT_PARTS_PER_TABLE, - format='PrettyCompactNoEscapes') - add_query(diagnostics, 'Merges in progress', - client=client, - query=SELECT_MERGES, - format='Vertical') - add_query(diagnostics, 'Mutations in progress', - client=client, - query=SELECT_MUTATIONS, - format='Vertical') - add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)', - client=client, - query=SELECT_RECENT_DATA_PARTS, - format='Vertical') + add_query( + diagnostics, + "Top 10 tables by max parts per partition", + client=client, + query=SELECT_PARTS_PER_TABLE, + format="PrettyCompactNoEscapes", + ) + add_query( + diagnostics, + "Merges in progress", + client=client, + query=SELECT_MERGES, + format="Vertical", + ) + add_query( + diagnostics, + "Mutations in progress", + client=client, + query=SELECT_MUTATIONS, + format="Vertical", + ) + add_query( + diagnostics, + "Recent data parts (modification time within last 3 minutes)", + client=client, + query=SELECT_RECENT_DATA_PARTS, + format="Vertical", + ) - add_query(diagnostics, 'system.detached_parts', - client=client, - query=SELECT_DETACHED_DATA_PARTS, - format='PrettyCompactNoEscapes', - section='Detached data') - add_command(diagnostics, 'Disk space usage', - command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh', - section='Detached data') + add_query( + diagnostics, + "system.detached_parts", + client=client, + query=SELECT_DETACHED_DATA_PARTS, + format="PrettyCompactNoEscapes", + section="Detached data", + ) + add_command( + diagnostics, + "Disk space usage", + command="du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh", + section="Detached data", + ) - add_query(diagnostics, 'Queries in progress (process list)', - client=client, - query=SELECT_PROCESSES, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by duration', - client=client, - query=SELECT_TOP_QUERIES_BY_DURATION, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by memory usage', - client=client, - query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Last 10 failed queries', - client=client, - query=SELECT_FAILED_QUERIES, - format='Vertical', - section='Queries') + add_query( + diagnostics, + "Queries in progress (process list)", + client=client, + query=SELECT_PROCESSES, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by duration", + client=client, + query=SELECT_TOP_QUERIES_BY_DURATION, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by memory usage", + client=client, + query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Last 10 failed queries", + client=client, + query=SELECT_FAILED_QUERIES, + format="Vertical", + section="Queries", + ) - add_query(diagnostics, 'Stack traces', - client=client, - query=SELECT_STACK_TRACES, - format='Vertical') + add_query( + diagnostics, + "Stack traces", + client=client, + query=SELECT_STACK_TRACES, + format="Vertical", + ) - if 'crash_log' in system_tables: - add_query(diagnostics, 'Crash log', - client=client, - query=SELECT_CRASH_LOG, - format='Vertical') + if "crash_log" in system_tables: + add_query( + diagnostics, + "Crash log", + client=client, + query=SELECT_CRASH_LOG, + format="Vertical", + ) - add_command(diagnostics, 'uname', 'uname -a') + add_command(diagnostics, "uname", "uname -a") diagnostics.dump(args.format) @@ -889,29 +975,34 @@ def parse_args(): Parse command-line arguments. """ parser = argparse.ArgumentParser() - parser.add_argument('--format', - choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'], - default='wiki') - parser.add_argument('--normalize-queries', - action='store_true', - default=False) - parser.add_argument('--host', dest="host", help="clickhouse host") - parser.add_argument('--port', dest="port", default=8123, help="clickhouse http port") - parser.add_argument('--user', dest="user", default="default", help="clickhouse user") - parser.add_argument('--password', dest="password", help="clickhouse password") + parser.add_argument( + "--format", + choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"], + default="wiki", + ) + parser.add_argument("--normalize-queries", action="store_true", default=False) + parser.add_argument("--host", dest="host", help="clickhouse host") + parser.add_argument( + "--port", dest="port", default=8123, help="clickhouse http port" + ) + parser.add_argument( + "--user", dest="user", default="default", help="clickhouse user" + ) + parser.add_argument("--password", dest="password", help="clickhouse password") return parser.parse_args() def add_query(diagnostics, name, client, query, format, section=None): query_args = { - 'normalize_queries': diagnostics.args.normalize_queries, + "normalize_queries": diagnostics.args.normalize_queries, } query = client.render_query(query, **query_args) diagnostics.add_query( name=name, query=query, result=execute_query(client, query, render_query=False, format=format), - section=section) + section=section, + ) def execute_query(client, query, render_query=True, format=None): @@ -926,14 +1017,18 @@ def execute_query(client, query, render_query=True, format=None): def add_command(diagnostics, name, command, section=None): diagnostics.add_command( - name=name, - command=command, - result=execute_command(command), - section=section) + name=name, command=command, result=execute_command(command), section=section + ) def execute_command(command, input=None): - proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen( + command, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) if isinstance(input, str): input = input.encode() @@ -941,7 +1036,7 @@ def execute_command(command, input=None): stdout, stderr = proc.communicate(input=input) if proc.returncode: - return f'failed with exit code {proc.returncode}\n{stderr.decode()}' + return f"failed with exit code {proc.returncode}\n{stderr.decode()}" return stdout.decode() @@ -957,8 +1052,8 @@ def parse_version(version): """ Parse version string. """ - return [int(x) for x in version.strip().split('.') if x.isnumeric()] + return [int(x) for x in version.strip().split(".") if x.isnumeric()] -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader index 33db76f57f4..4855bdb3f96 100755 --- a/utils/s3tools/s3uploader +++ b/utils/s3tools/s3uploader @@ -28,39 +28,48 @@ class S3API(object): bucket = self.connection.get_bucket(bucket_name) key = bucket.initiate_multipart_upload(s3_path) logging.info("Will upload to s3 path %s", s3_path) - chunksize = 1024 * 1024 * 1024 # 1 GB + chunksize = 1024 * 1024 * 1024 # 1 GB filesize = os.stat(file_path).st_size logging.info("File size is %s", filesize) chunkcount = int(math.ceil(filesize / chunksize)) def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + try: for i in range(chunkcount + 1): logging.info("Uploading chunk %s of %s", i, chunkcount + 1) offset = chunksize * i bytes_size = min(chunksize, filesize - offset) - with open(file_path, 'r') as fp: + with open(file_path, "r") as fp: fp.seek(offset) - key.upload_part_from_file(fp=fp, part_num=i+1, - size=bytes_size, cb=call_back, - num_cb=100) + key.upload_part_from_file( + fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100 + ) key.complete_upload() except Exception as ex: key.cancel_upload() raise ex logging.info("Contents were set") return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def set_file_contents(self, bucket, local_file_path, s3_file_path): key = Key(bucket) key.key = s3_file_path file_size = os.stat(local_file_path).st_size - logging.info("Uploading file `%s` to `%s`. Size is %s", local_file_path, s3_file_path, file_size) + logging.info( + "Uploading file `%s` to `%s`. Size is %s", + local_file_path, + s3_file_path, + file_size, + ) + def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + key.set_contents_from_filename(local_file_path, cb=call_back) def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path): @@ -74,12 +83,14 @@ class S3API(object): path = root.split(os.sep) for file in files: local_file_path = os.path.join(root, file) - s3_file = local_file_path[len(directory_path) + 1:] + s3_file = local_file_path[len(directory_path) + 1 :] s3_file_path = os.path.join(s3_path, s3_file) self.set_file_contents(bucket, local_file_path, s3_file_path) logging.info("Uploading finished") - return "https://{bucket}.{mds_url}/{path}".format(bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + return "https://{bucket}.{mds_url}/{path}".format( + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def list_bucket_keys(self, bucket_name): bucket = self.connection.get_bucket(bucket_name) @@ -91,100 +102,121 @@ class S3API(object): bucket.get_all_keys() for obj in bucket.get_all_keys(): if obj.key.startswith(folder_path): - print('Removing ' + obj.key) + print("Removing " + obj.key) obj.delete() -def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, - tmp_prefix): +def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix): - relative_data_path = os.path.join('data', db_name, table_name) - relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql') + relative_data_path = os.path.join("data", db_name, table_name) + relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql") path_to_data = os.path.join(clickhouse_data_path, relative_data_path) path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) - temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name) + temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name) with tarfile.open(temporary_file_name, "w") as bundle: bundle.add(path_to_data, arcname=relative_data_path) bundle.add(path_to_metadata, arcname=relative_meta_path) return temporary_file_name -USAGE_EXAMPLES = ''' +USAGE_EXAMPLES = """ examples: \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/ -''' +""" if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( description="Simple tool for uploading datasets to clickhouse S3", - usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES)) - parser.add_argument('--s3-api-url', default='s3.amazonaws.com') - parser.add_argument('--s3-common-url', default='s3.amazonaws.com') - parser.add_argument('--bucket-name', default='clickhouse-datasets') - parser.add_argument('--dataset-name', required=True, - help='Name of dataset, will be used in uploaded path') - parser.add_argument('--access-key-id', required=True) - parser.add_argument('--secret-access-key', required=True) - parser.add_argument('--clickhouse-data-path', - default='/var/lib/clickhouse/', - help='Path to clickhouse database on filesystem') - parser.add_argument('--s3-path', help='Path in s3, where to upload file') - parser.add_argument('--tmp-prefix', default='/tmp', - help='Prefix to store temporary downloaded file') + usage="%(prog)s [options] {}".format(USAGE_EXAMPLES), + ) + parser.add_argument("--s3-api-url", default="s3.amazonaws.com") + parser.add_argument("--s3-common-url", default="s3.amazonaws.com") + parser.add_argument("--bucket-name", default="clickhouse-datasets") + parser.add_argument( + "--dataset-name", + required=True, + help="Name of dataset, will be used in uploaded path", + ) + parser.add_argument("--access-key-id", required=True) + parser.add_argument("--secret-access-key", required=True) + parser.add_argument( + "--clickhouse-data-path", + default="/var/lib/clickhouse/", + help="Path to clickhouse database on filesystem", + ) + parser.add_argument("--s3-path", help="Path in s3, where to upload file") + parser.add_argument( + "--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file" + ) data_group = parser.add_mutually_exclusive_group(required=True) - table_name_argument = data_group.add_argument('--table-name', - help='Name of table with database, if you are uploading partitions') - data_group.add_argument('--file-path', - help='Name of file, if you are uploading') - data_group.add_argument('--directory-path', help='Path to directory with files to upload') - data_group.add_argument('--list-directory', help='List s3 directory by --directory-path') - data_group.add_argument('--remove-directory', help='Remove s3 directory by --directory-path') + table_name_argument = data_group.add_argument( + "--table-name", + help="Name of table with database, if you are uploading partitions", + ) + data_group.add_argument("--file-path", help="Name of file, if you are uploading") + data_group.add_argument( + "--directory-path", help="Path to directory with files to upload" + ) + data_group.add_argument( + "--list-directory", help="List s3 directory by --directory-path" + ) + data_group.add_argument( + "--remove-directory", help="Remove s3 directory by --directory-path" + ) args = parser.parse_args() if args.table_name is not None and args.clickhouse_data_path is None: - raise argparse.ArgumentError(table_name_argument, - "You should specify --clickhouse-data-path to upload --table") + raise argparse.ArgumentError( + table_name_argument, + "You should specify --clickhouse-data-path to upload --table", + ) s3_conn = S3API( - args.access_key_id, args.secret_access_key, - args.s3_api_url, args.s3_common_url) + args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url + ) - file_path = '' + file_path = "" directory_path = args.directory_path s3_path = args.s3_path if args.list_directory: s3_conn.list_bucket_keys(args.bucket_name) elif args.remove_directory: - print('Removing s3 path: ' + args.remove_directory) + print("Removing s3 path: " + args.remove_directory) s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory) elif args.directory_path is not None: - url = s3_conn.upload_data_for_static_files_disk(args.bucket_name, directory_path, s3_path) + url = s3_conn.upload_data_for_static_files_disk( + args.bucket_name, directory_path, s3_path + ) logging.info("Data uploaded: %s", url) else: if args.table_name is not None: - if '.' not in args.table_name: - db_name = 'default' + if "." not in args.table_name: + db_name = "default" else: - db_name, table_name = args.table_name.split('.') + db_name, table_name = args.table_name.split(".") file_path = make_tar_file_for_table( - args.clickhouse_data_path, db_name, table_name, args.tmp_prefix) + args.clickhouse_data_path, db_name, table_name, args.tmp_prefix + ) else: file_path = args.file_path - if 'tsv' in file_path: + if "tsv" in file_path: s3_path = os.path.join( - args.dataset_name, 'tsv', os.path.basename(file_path)) + args.dataset_name, "tsv", os.path.basename(file_path) + ) if args.table_name is not None: s3_path = os.path.join( - args.dataset_name, 'partitions', os.path.basename(file_path)) + args.dataset_name, "partitions", os.path.basename(file_path) + ) elif args.s3_path is not None: s3_path = os.path.join( - args.dataset_name, args.s3_path, os.path.basename(file_path)) + args.dataset_name, args.s3_path, os.path.basename(file_path) + ) else: raise Exception("Don't know s3-path to upload") diff --git a/utils/test_history/test-history b/utils/test_history/test-history index fdd6c36e9dc..5f031af1d3a 100755 --- a/utils/test_history/test-history +++ b/utils/test_history/test-history @@ -11,13 +11,14 @@ from termcolor import colored import sys COLORMAP = { - "success": colored("success", 'green'), - "failure": colored("failure", 'red'), - "error": colored("error", 'red'), - "pending": colored("pending", 'yellow'), - "not run": colored("not run", 'white'), + "success": colored("success", "green"), + "failure": colored("failure", "red"), + "error": colored("error", "red"), + "pending": colored("pending", "yellow"), + "not run": colored("not run", "white"), } + def _filter_statuses(statuses): """ Squash statuses to latest state @@ -69,7 +70,7 @@ if __name__ == "__main__": date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S") gh = Github(args.token) - repo = gh.get_repo('ClickHouse/ClickHouse') + repo = gh.get_repo("ClickHouse/ClickHouse") commits = get_commits(repo, date_since) longest_header = [] @@ -101,6 +102,6 @@ if __name__ == "__main__": result_data.append(current_result) if sys.stdout.isatty(): - longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header] + longest_header = [colored(h, "white", attrs=["bold"]) for h in longest_header] print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid"))