mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
apply black formatter
This commit is contained in:
parent
21f5d20b9e
commit
0ee8dfad53
@ -10,31 +10,38 @@ import requests
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
|
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
|
||||||
|
|
||||||
AVAILABLE_DATASETS = {
|
AVAILABLE_DATASETS = {
|
||||||
'hits': 'hits_v1.tar',
|
"hits": "hits_v1.tar",
|
||||||
'visits': 'visits_v1.tar',
|
"visits": "visits_v1.tar",
|
||||||
}
|
}
|
||||||
|
|
||||||
RETRIES_COUNT = 5
|
RETRIES_COUNT = 5
|
||||||
|
|
||||||
|
|
||||||
def _get_temp_file_name():
|
def _get_temp_file_name():
|
||||||
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
return os.path.join(
|
||||||
|
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_url(base_url, dataset):
|
def build_url(base_url, dataset):
|
||||||
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
|
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
|
||||||
|
|
||||||
|
|
||||||
def dowload_with_progress(url, path):
|
def dowload_with_progress(url, path):
|
||||||
logging.info("Downloading from %s to temp path %s", url, path)
|
logging.info("Downloading from %s to temp path %s", url, path)
|
||||||
for i in range(RETRIES_COUNT):
|
for i in range(RETRIES_COUNT):
|
||||||
try:
|
try:
|
||||||
with open(path, 'wb') as f:
|
with open(path, "wb") as f:
|
||||||
response = requests.get(url, stream=True)
|
response = requests.get(url, stream=True)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
total_length = response.headers.get('content-length')
|
total_length = response.headers.get("content-length")
|
||||||
if total_length is None or int(total_length) == 0:
|
if total_length is None or int(total_length) == 0:
|
||||||
logging.info("No content-length, will download file without progress")
|
logging.info(
|
||||||
|
"No content-length, will download file without progress"
|
||||||
|
)
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
else:
|
else:
|
||||||
dl = 0
|
dl = 0
|
||||||
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
|
|||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
done = int(50 * dl / total_length)
|
done = int(50 * dl / total_length)
|
||||||
percent = int(100 * float(dl) / total_length)
|
percent = int(100 * float(dl) / total_length)
|
||||||
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
|
sys.stdout.write(
|
||||||
|
"\r[{}{}] {}%".format(
|
||||||
|
"=" * done, " " * (50 - done), percent
|
||||||
|
)
|
||||||
|
)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
break
|
break
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
|
|||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
else:
|
else:
|
||||||
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
|
raise Exception(
|
||||||
|
"Cannot download dataset from {}, all retries exceeded".format(url)
|
||||||
|
)
|
||||||
|
|
||||||
sys.stdout.write("\n")
|
sys.stdout.write("\n")
|
||||||
logging.info("Downloading finished")
|
logging.info("Downloading finished")
|
||||||
|
|
||||||
|
|
||||||
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
||||||
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
|
logging.info(
|
||||||
with tarfile.open(tar_path, 'r') as comp_file:
|
"Will unpack data from temp path %s to clickhouse db %s",
|
||||||
|
tar_path,
|
||||||
|
clickhouse_path,
|
||||||
|
)
|
||||||
|
with tarfile.open(tar_path, "r") as comp_file:
|
||||||
comp_file.extractall(path=clickhouse_path)
|
comp_file.extractall(path=clickhouse_path)
|
||||||
logging.info("Unpack finished")
|
logging.info("Unpack finished")
|
||||||
|
|
||||||
@ -72,15 +90,21 @@ if __name__ == "__main__":
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Simple tool for dowloading datasets for clickhouse from S3")
|
description="Simple tool for dowloading datasets for clickhouse from S3"
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
|
parser.add_argument(
|
||||||
parser.add_argument('--url-prefix', default=DEFAULT_URL)
|
"--dataset-names",
|
||||||
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
|
required=True,
|
||||||
|
nargs="+",
|
||||||
|
choices=list(AVAILABLE_DATASETS.keys()),
|
||||||
|
)
|
||||||
|
parser.add_argument("--url-prefix", default=DEFAULT_URL)
|
||||||
|
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
datasets = args.dataset_names
|
datasets = args.dataset_names
|
||||||
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
|
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
logging.info("Processing %s", dataset)
|
logging.info("Processing %s", dataset)
|
||||||
temp_archive_path = _get_temp_file_name()
|
temp_archive_path = _get_temp_file_name()
|
||||||
@ -92,10 +116,11 @@ if __name__ == "__main__":
|
|||||||
logging.info("Some exception occured %s", str(ex))
|
logging.info("Some exception occured %s", str(ex))
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
|
logging.info(
|
||||||
|
"Will remove downloaded file %s from filesystem if it exists",
|
||||||
|
temp_archive_path,
|
||||||
|
)
|
||||||
if os.path.exists(temp_archive_path):
|
if os.path.exists(temp_archive_path):
|
||||||
os.remove(temp_archive_path)
|
os.remove(temp_archive_path)
|
||||||
logging.info("Processing of %s finished", dataset)
|
logging.info("Processing of %s finished", dataset)
|
||||||
logging.info("Fetch finished, enjoy your tables!")
|
logging.info("Fetch finished, enjoy your tables!")
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ def trim_for_log(s):
|
|||||||
return s
|
return s
|
||||||
lines = s.splitlines()
|
lines = s.splitlines()
|
||||||
if len(lines) > 10000:
|
if len(lines) > 10000:
|
||||||
separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40
|
separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40
|
||||||
return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:])
|
return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:])
|
||||||
else:
|
else:
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
@ -95,7 +95,13 @@ class HTTPError(Exception):
|
|||||||
|
|
||||||
# Helpers to execute queries via HTTP interface.
|
# Helpers to execute queries via HTTP interface.
|
||||||
def clickhouse_execute_http(
|
def clickhouse_execute_http(
|
||||||
base_args, query, timeout=30, settings=None, default_format=None, max_http_retries=5, retry_error_codes=False
|
base_args,
|
||||||
|
query,
|
||||||
|
timeout=30,
|
||||||
|
settings=None,
|
||||||
|
default_format=None,
|
||||||
|
max_http_retries=5,
|
||||||
|
retry_error_codes=False,
|
||||||
):
|
):
|
||||||
if args.secure:
|
if args.secure:
|
||||||
client = http.client.HTTPSConnection(
|
client = http.client.HTTPSConnection(
|
||||||
@ -146,12 +152,36 @@ def clickhouse_execute_http(
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def clickhouse_execute(base_args, query, timeout=30, settings=None, max_http_retries=5, retry_error_codes=False):
|
|
||||||
return clickhouse_execute_http(base_args, query, timeout, settings, max_http_retries=max_http_retries, retry_error_codes=retry_error_codes).strip()
|
def clickhouse_execute(
|
||||||
|
base_args,
|
||||||
|
query,
|
||||||
|
timeout=30,
|
||||||
|
settings=None,
|
||||||
|
max_http_retries=5,
|
||||||
|
retry_error_codes=False,
|
||||||
|
):
|
||||||
|
return clickhouse_execute_http(
|
||||||
|
base_args,
|
||||||
|
query,
|
||||||
|
timeout,
|
||||||
|
settings,
|
||||||
|
max_http_retries=max_http_retries,
|
||||||
|
retry_error_codes=retry_error_codes,
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
def clickhouse_execute_json(base_args, query, timeout=60, settings=None, max_http_retries=5):
|
def clickhouse_execute_json(
|
||||||
data = clickhouse_execute_http(base_args, query, timeout, settings, "JSONEachRow", max_http_retries=max_http_retries)
|
base_args, query, timeout=60, settings=None, max_http_retries=5
|
||||||
|
):
|
||||||
|
data = clickhouse_execute_http(
|
||||||
|
base_args,
|
||||||
|
query,
|
||||||
|
timeout,
|
||||||
|
settings,
|
||||||
|
"JSONEachRow",
|
||||||
|
max_http_retries=max_http_retries,
|
||||||
|
)
|
||||||
if not data:
|
if not data:
|
||||||
return None
|
return None
|
||||||
rows = []
|
rows = []
|
||||||
@ -648,7 +678,9 @@ class TestCase:
|
|||||||
|
|
||||||
clickhouse_execute(
|
clickhouse_execute(
|
||||||
args,
|
args,
|
||||||
"CREATE DATABASE IF NOT EXISTS " + database + get_db_engine(testcase_args, database),
|
"CREATE DATABASE IF NOT EXISTS "
|
||||||
|
+ database
|
||||||
|
+ get_db_engine(testcase_args, database),
|
||||||
settings=get_create_database_settings(args, testcase_args),
|
settings=get_create_database_settings(args, testcase_args),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -831,7 +863,8 @@ class TestCase:
|
|||||||
|
|
||||||
# TODO: remove checking "no-upgrade-check" after 23.1
|
# TODO: remove checking "no-upgrade-check" after 23.1
|
||||||
elif args.upgrade_check and (
|
elif args.upgrade_check and (
|
||||||
"no-upgrade-check" in tags or "no-upgrade-check" in tags):
|
"no-upgrade-check" in tags or "no-upgrade-check" in tags
|
||||||
|
):
|
||||||
return FailureReason.NO_UPGRADE_CHECK
|
return FailureReason.NO_UPGRADE_CHECK
|
||||||
|
|
||||||
elif tags and ("no-s3-storage" in tags) and args.s3_storage:
|
elif tags and ("no-s3-storage" in tags) and args.s3_storage:
|
||||||
@ -1051,7 +1084,11 @@ class TestCase:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def send_test_name_failed(suite: str, case: str):
|
def send_test_name_failed(suite: str, case: str):
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'", retry_error_codes=True)
|
clickhouse_execute(
|
||||||
|
args,
|
||||||
|
f"SELECT 'Running test {suite}/{case} from pid={pid}'",
|
||||||
|
retry_error_codes=True,
|
||||||
|
)
|
||||||
|
|
||||||
def run_single_test(
|
def run_single_test(
|
||||||
self, server_logs_level, client_options
|
self, server_logs_level, client_options
|
||||||
@ -2220,6 +2257,7 @@ def find_binary(name):
|
|||||||
|
|
||||||
raise Exception(f"{name} was not found in PATH")
|
raise Exception(f"{name} was not found in PATH")
|
||||||
|
|
||||||
|
|
||||||
def find_clickhouse_command(binary, command):
|
def find_clickhouse_command(binary, command):
|
||||||
symlink = binary + "-" + command
|
symlink = binary + "-" + command
|
||||||
if os.access(symlink, os.X_OK):
|
if os.access(symlink, os.X_OK):
|
||||||
@ -2228,6 +2266,7 @@ def find_clickhouse_command(binary, command):
|
|||||||
# To avoid requiring symlinks (in case you download binary from CI)
|
# To avoid requiring symlinks (in case you download binary from CI)
|
||||||
return binary + " " + command
|
return binary + " " + command
|
||||||
|
|
||||||
|
|
||||||
def get_additional_client_options(args):
|
def get_additional_client_options(args):
|
||||||
if args.client_option:
|
if args.client_option:
|
||||||
return " ".join("--" + option for option in args.client_option)
|
return " ".join("--" + option for option in args.client_option)
|
||||||
@ -2569,7 +2608,9 @@ if __name__ == "__main__":
|
|||||||
"WARNING: --extract_from_config option is deprecated and will be removed the the future",
|
"WARNING: --extract_from_config option is deprecated and will be removed the the future",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
args.extract_from_config = find_clickhouse_command(args.binary, "extract-from-config")
|
args.extract_from_config = find_clickhouse_command(
|
||||||
|
args.binary, "extract-from-config"
|
||||||
|
)
|
||||||
|
|
||||||
if args.configclient:
|
if args.configclient:
|
||||||
args.client += " --config-file=" + args.configclient
|
args.client += " --config-file=" + args.configclient
|
||||||
|
@ -243,11 +243,18 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-random", action="store", dest="no_random", help="Disable tests order randomization"
|
"--no-random",
|
||||||
|
action="store",
|
||||||
|
dest="no_random",
|
||||||
|
help="Disable tests order randomization",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--pre-pull", action="store_true", default=False, dest="pre_pull", help="Pull images for docker_compose before all other actions"
|
"--pre-pull",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
dest="pre_pull",
|
||||||
|
help="Pull images for docker_compose before all other actions",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -306,7 +313,6 @@ if __name__ == "__main__":
|
|||||||
# if not args.no_random:
|
# if not args.no_random:
|
||||||
# rand_args += f"--random-seed={os.getpid()}"
|
# rand_args += f"--random-seed={os.getpid()}"
|
||||||
|
|
||||||
|
|
||||||
net = ""
|
net = ""
|
||||||
if args.network:
|
if args.network:
|
||||||
net = "--net={}".format(args.network)
|
net = "--net={}".format(args.network)
|
||||||
@ -416,8 +422,11 @@ if __name__ == "__main__":
|
|||||||
name=CONTAINER_NAME,
|
name=CONTAINER_NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
cmd = cmd_base + " " + args.command
|
cmd = cmd_base + " " + args.command
|
||||||
cmd_pre_pull = cmd_base + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;"
|
cmd_pre_pull = (
|
||||||
|
cmd_base
|
||||||
|
+ " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;"
|
||||||
|
)
|
||||||
|
|
||||||
containers = subprocess.check_output(
|
containers = subprocess.check_output(
|
||||||
f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}",
|
f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}",
|
||||||
|
@ -1,57 +1,72 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
|
||||||
def gen_queries():
|
def gen_queries():
|
||||||
create_template = 'create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}'
|
create_template = "create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}"
|
||||||
drop_query = 'drop table if exists tab_00386'
|
drop_query = "drop table if exists tab_00386"
|
||||||
values = ('1', "'a'", 'tuple(1)', 'tuple(tuple(1))', "(1, 'a')", "tuple((1, 'a'))")
|
values = ("1", "'a'", "tuple(1)", "tuple(tuple(1))", "(1, 'a')", "tuple((1, 'a'))")
|
||||||
insert_query = "insert into tab_00386 values (1, 'a', tuple(1), tuple(tuple(1)), (1, 'a'), tuple((1, 'a')))"
|
insert_query = "insert into tab_00386 values (1, 'a', tuple(1), tuple(tuple(1)), (1, 'a'), tuple((1, 'a')))"
|
||||||
columns = tuple('a b c d'.split())
|
columns = tuple("a b c d".split())
|
||||||
order_by_columns = tuple('a b c'.split())
|
order_by_columns = tuple("a b c".split())
|
||||||
partition_by_columns = tuple(' tuple() a'.split())
|
partition_by_columns = tuple(" tuple() a".split())
|
||||||
|
|
||||||
for partition in partition_by_columns:
|
for partition in partition_by_columns:
|
||||||
for key_mask in range(1, 1 << len(order_by_columns)):
|
for key_mask in range(1, 1 << len(order_by_columns)):
|
||||||
key = ','.join(order_by_columns[i] for i in range(len(order_by_columns)) if (1 << i) & key_mask != 0)
|
key = ",".join(
|
||||||
|
order_by_columns[i]
|
||||||
|
for i in range(len(order_by_columns))
|
||||||
|
if (1 << i) & key_mask != 0
|
||||||
|
)
|
||||||
create_query = create_template.format(key, partition)
|
create_query = create_template.format(key, partition)
|
||||||
for q in (drop_query, create_query, insert_query):
|
for q in (drop_query, create_query, insert_query):
|
||||||
yield q
|
yield q
|
||||||
|
|
||||||
for column, value in zip(columns, values):
|
for column, value in zip(columns, values):
|
||||||
yield 'select {} in {} from tab_00386'.format(column, value)
|
yield "select {} in {} from tab_00386".format(column, value)
|
||||||
yield 'select {} in tuple({}) from tab_00386'.format(column, value)
|
yield "select {} in tuple({}) from tab_00386".format(column, value)
|
||||||
yield 'select {} in (select {} from tab_00386) from tab_00386'.format(column, column)
|
yield "select {} in (select {} from tab_00386) from tab_00386".format(
|
||||||
|
column, column
|
||||||
|
)
|
||||||
|
|
||||||
for i in range(len(columns)):
|
for i in range(len(columns)):
|
||||||
for j in range(i, len(columns)):
|
for j in range(i, len(columns)):
|
||||||
yield 'select ({}, {}) in tuple({}, {}) from tab_00386'.format(columns[i], columns[j], values[i], values[j])
|
yield "select ({}, {}) in tuple({}, {}) from tab_00386".format(
|
||||||
yield 'select ({}, {}) in (select {}, {} from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j])
|
columns[i], columns[j], values[i], values[j]
|
||||||
yield 'select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j])
|
)
|
||||||
|
yield "select ({}, {}) in (select {}, {} from tab_00386) from tab_00386".format(
|
||||||
|
columns[i], columns[j], columns[i], columns[j]
|
||||||
|
)
|
||||||
|
yield "select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386".format(
|
||||||
|
columns[i], columns[j], columns[i], columns[j]
|
||||||
|
)
|
||||||
|
|
||||||
yield "select e in (1, 'a') from tab_00386"
|
yield "select e in (1, 'a') from tab_00386"
|
||||||
yield "select f in tuple((1, 'a')) from tab_00386"
|
yield "select f in tuple((1, 'a')) from tab_00386"
|
||||||
yield "select f in tuple(tuple((1, 'a'))) from tab_00386"
|
yield "select f in tuple(tuple((1, 'a'))) from tab_00386"
|
||||||
|
|
||||||
yield 'select e in (select a, b from tab_00386) from tab_00386'
|
yield "select e in (select a, b from tab_00386) from tab_00386"
|
||||||
yield 'select e in (select (a, b) from tab_00386) from tab_00386'
|
yield "select e in (select (a, b) from tab_00386) from tab_00386"
|
||||||
yield 'select f in (select tuple((a, b)) from tab_00386) from tab_00386'
|
yield "select f in (select tuple((a, b)) from tab_00386) from tab_00386"
|
||||||
yield 'select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386'
|
yield "select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386"
|
||||||
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
url = os.environ['CLICKHOUSE_URL']
|
url = os.environ["CLICKHOUSE_URL"]
|
||||||
|
|
||||||
for q in gen_queries():
|
for q in gen_queries():
|
||||||
resp = requests.post(url, data=q)
|
resp = requests.post(url, data=q)
|
||||||
if resp.status_code != 200 or resp.text.strip() not in ('1', ''):
|
if resp.status_code != 200 or resp.text.strip() not in ("1", ""):
|
||||||
print('Query:', q)
|
print("Query:", q)
|
||||||
print('Code:', resp.status_code)
|
print("Code:", resp.status_code)
|
||||||
print(resp.text)
|
print(resp.text)
|
||||||
break
|
break
|
||||||
|
|
||||||
requests.post(url, data='drop table tab_00386')
|
requests.post(url, data="drop table tab_00386")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -2,8 +2,20 @@
|
|||||||
|
|
||||||
import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys
|
import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys
|
||||||
|
|
||||||
|
|
||||||
def get_ch_answer(query):
|
def get_ch_answer(query):
|
||||||
return urllib.request.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ), data=query.encode()).read().decode()
|
return (
|
||||||
|
urllib.request.urlopen(
|
||||||
|
os.environ.get(
|
||||||
|
"CLICKHOUSE_URL",
|
||||||
|
"http://localhost:" + os.environ.get("CLICKHOUSE_PORT_HTTP", "8123"),
|
||||||
|
),
|
||||||
|
data=query.encode(),
|
||||||
|
)
|
||||||
|
.read()
|
||||||
|
.decode()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_answers(query, answer):
|
def check_answers(query, answer):
|
||||||
ch_answer = get_ch_answer(query)
|
ch_answer = get_ch_answer(query)
|
||||||
@ -13,36 +25,34 @@ def check_answers(query, answer):
|
|||||||
print("Fetched answer :", ch_answer)
|
print("Fetched answer :", ch_answer)
|
||||||
exit(-1)
|
exit(-1)
|
||||||
|
|
||||||
|
|
||||||
def get_values():
|
def get_values():
|
||||||
values = [0, 1, -1]
|
values = [0, 1, -1]
|
||||||
for bits in [8, 16, 32, 64]:
|
for bits in [8, 16, 32, 64]:
|
||||||
values += [2**bits, 2**bits - 1]
|
values += [2**bits, 2**bits - 1]
|
||||||
values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1]
|
values += [2 ** (bits - 1) - 1, 2 ** (bits - 1), 2 ** (bits - 1) + 1]
|
||||||
values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1]
|
values += [-(2 ** (bits - 1)) - 1, -(2 ** (bits - 1)), -(2 ** (bits - 1)) + 1]
|
||||||
return values
|
return values
|
||||||
|
|
||||||
|
|
||||||
def is_valid_integer(x):
|
def is_valid_integer(x):
|
||||||
return -2**63 <= x and x <= 2**64-1
|
return -(2**63) <= x and x <= 2**64 - 1
|
||||||
|
|
||||||
|
|
||||||
TEST_WITH_CASTING=True
|
TEST_WITH_CASTING = True
|
||||||
GENERATE_TEST_FILES=False
|
GENERATE_TEST_FILES = False
|
||||||
|
|
||||||
TYPES = {
|
TYPES = {
|
||||||
"UInt8" : { "bits" : 8, "sign" : False, "float" : False },
|
"UInt8": {"bits": 8, "sign": False, "float": False},
|
||||||
"Int8" : { "bits" : 8, "sign" : True, "float" : False },
|
"Int8": {"bits": 8, "sign": True, "float": False},
|
||||||
|
"UInt16": {"bits": 16, "sign": False, "float": False},
|
||||||
"UInt16": { "bits" : 16, "sign" : False, "float" : False },
|
"Int16": {"bits": 16, "sign": True, "float": False},
|
||||||
"Int16" : { "bits" : 16, "sign" : True, "float" : False },
|
"UInt32": {"bits": 32, "sign": False, "float": False},
|
||||||
|
"Int32": {"bits": 32, "sign": True, "float": False},
|
||||||
"UInt32": { "bits" : 32, "sign" : False, "float" : False },
|
"UInt64": {"bits": 64, "sign": False, "float": False},
|
||||||
"Int32" : { "bits" : 32, "sign" : True, "float" : False },
|
"Int64": {"bits": 64, "sign": True, "float": False}
|
||||||
|
# "Float32" : { "bits" : 32, "sign" : True, "float" : True },
|
||||||
"UInt64": { "bits" : 64, "sign" : False, "float" : False },
|
# "Float64" : { "bits" : 64, "sign" : True, "float" : True }
|
||||||
"Int64" : { "bits" : 64, "sign" : True, "float" : False }
|
|
||||||
|
|
||||||
#"Float32" : { "bits" : 32, "sign" : True, "float" : True },
|
|
||||||
#"Float64" : { "bits" : 64, "sign" : True, "float" : True }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -55,14 +65,18 @@ def inside_range(value, type_name):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
if signed:
|
if signed:
|
||||||
return -2**(bits-1) <= value and value <= 2**(bits-1) - 1
|
return -(2 ** (bits - 1)) <= value and value <= 2 ** (bits - 1) - 1
|
||||||
else:
|
else:
|
||||||
return 0 <= value and value <= 2**bits - 1
|
return 0 <= value and value <= 2**bits - 1
|
||||||
|
|
||||||
|
|
||||||
def test_operators(v1, v2, v1_passed, v2_passed):
|
def test_operators(v1, v2, v1_passed, v2_passed):
|
||||||
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed)
|
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(
|
||||||
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed)
|
v1=v1_passed, v2=v2_passed
|
||||||
|
)
|
||||||
|
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(
|
||||||
|
v1=v2_passed, v2=v1_passed
|
||||||
|
)
|
||||||
|
|
||||||
answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2]
|
answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2]
|
||||||
answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1]
|
answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1]
|
||||||
@ -74,6 +88,7 @@ def test_operators(v1, v2, v1_passed, v2_passed):
|
|||||||
|
|
||||||
VALUES = [x for x in get_values() if is_valid_integer(x)]
|
VALUES = [x for x in get_values() if is_valid_integer(x)]
|
||||||
|
|
||||||
|
|
||||||
def test_pair(v1, v2):
|
def test_pair(v1, v2):
|
||||||
query = "SELECT {}, {}, ".format(v1, v2)
|
query = "SELECT {}, {}, ".format(v1, v2)
|
||||||
answers = "{}\t{}\t".format(v1, v2)
|
answers = "{}\t{}\t".format(v1, v2)
|
||||||
@ -87,19 +102,58 @@ def test_pair(v1, v2):
|
|||||||
if inside_range(v1, t1):
|
if inside_range(v1, t1):
|
||||||
for t2 in TYPES.keys():
|
for t2 in TYPES.keys():
|
||||||
if inside_range(v2, t2):
|
if inside_range(v2, t2):
|
||||||
q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2))
|
q, a = test_operators(
|
||||||
query += ', ' + q
|
v1, v2, "to{}({})".format(t1, v1), "to{}({})".format(t2, v2)
|
||||||
|
)
|
||||||
|
query += ", " + q
|
||||||
answers += "\t" + a
|
answers += "\t" + a
|
||||||
|
|
||||||
check_answers(query, answers)
|
check_answers(query, answers)
|
||||||
return query, answers
|
return query, answers
|
||||||
|
|
||||||
|
|
||||||
VALUES_INT = [0, -1, 1, 2**64-1, 2**63, -2**63, 2**63-1, 2**51, 2**52, 2**53-1, 2**53, 2**53+1, 2**53+2, -2**53+1, -2**53, -2**53-1, -2**53-2, 2*52, -2**52]
|
VALUES_INT = [
|
||||||
VALUES_FLOAT = [float(x) for x in VALUES_INT + [-0.5, 0.5, -1.5, 1.5, 2**53, 2**51 - 0.5, 2**51 + 0.5, 2**60, -2**60, -2**63 - 10000, 2**63 + 10000]]
|
0,
|
||||||
|
-1,
|
||||||
|
1,
|
||||||
|
2**64 - 1,
|
||||||
|
2**63,
|
||||||
|
-(2**63),
|
||||||
|
2**63 - 1,
|
||||||
|
2**51,
|
||||||
|
2**52,
|
||||||
|
2**53 - 1,
|
||||||
|
2**53,
|
||||||
|
2**53 + 1,
|
||||||
|
2**53 + 2,
|
||||||
|
-(2**53) + 1,
|
||||||
|
-(2**53),
|
||||||
|
-(2**53) - 1,
|
||||||
|
-(2**53) - 2,
|
||||||
|
2 * 52,
|
||||||
|
-(2**52),
|
||||||
|
]
|
||||||
|
VALUES_FLOAT = [
|
||||||
|
float(x)
|
||||||
|
for x in VALUES_INT
|
||||||
|
+ [
|
||||||
|
-0.5,
|
||||||
|
0.5,
|
||||||
|
-1.5,
|
||||||
|
1.5,
|
||||||
|
2**53,
|
||||||
|
2**51 - 0.5,
|
||||||
|
2**51 + 0.5,
|
||||||
|
2**60,
|
||||||
|
-(2**60),
|
||||||
|
-(2**63) - 10000,
|
||||||
|
2**63 + 10000,
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_float_pair(i, f):
|
def test_float_pair(i, f):
|
||||||
f_str = ("%.9f" % f)
|
f_str = "%.9f" % f
|
||||||
query = "SELECT '{}', '{}', ".format(i, f_str)
|
query = "SELECT '{}', '{}', ".format(i, f_str)
|
||||||
answers = "{}\t{}\t".format(i, f_str)
|
answers = "{}\t{}\t".format(i, f_str)
|
||||||
|
|
||||||
@ -110,8 +164,8 @@ def test_float_pair(i, f):
|
|||||||
if TEST_WITH_CASTING:
|
if TEST_WITH_CASTING:
|
||||||
for t1 in TYPES.keys():
|
for t1 in TYPES.keys():
|
||||||
if inside_range(i, t1):
|
if inside_range(i, t1):
|
||||||
q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str)
|
q, a = test_operators(i, f, "to{}({})".format(t1, i), f_str)
|
||||||
query += ', ' + q
|
query += ", " + q
|
||||||
answers += "\t" + a
|
answers += "\t" + a
|
||||||
|
|
||||||
check_answers(query, answers)
|
check_answers(query, answers)
|
||||||
@ -120,22 +174,26 @@ def test_float_pair(i, f):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
if GENERATE_TEST_FILES:
|
if GENERATE_TEST_FILES:
|
||||||
base_name = '00411_accurate_number_comparison'
|
base_name = "00411_accurate_number_comparison"
|
||||||
sql_file = open(base_name + '.sql', 'wt')
|
sql_file = open(base_name + ".sql", "wt")
|
||||||
ref_file = open(base_name + '.reference', 'wt')
|
ref_file = open(base_name + ".reference", "wt")
|
||||||
|
|
||||||
num_int_tests = len(list(itertools.combinations(VALUES, 2)))
|
num_int_tests = len(list(itertools.combinations(VALUES, 2)))
|
||||||
|
|
||||||
num_parts = 4
|
num_parts = 4
|
||||||
for part in range(0, num_parts):
|
for part in range(0, num_parts):
|
||||||
if 'int' + str(part + 1) in sys.argv[1:]:
|
if "int" + str(part + 1) in sys.argv[1:]:
|
||||||
for (v1, v2) in itertools.islice(itertools.combinations(VALUES, 2), part * num_int_tests // num_parts, (part + 1) * num_int_tests // num_parts):
|
for (v1, v2) in itertools.islice(
|
||||||
|
itertools.combinations(VALUES, 2),
|
||||||
|
part * num_int_tests // num_parts,
|
||||||
|
(part + 1) * num_int_tests // num_parts,
|
||||||
|
):
|
||||||
q, a = test_pair(v1, v2)
|
q, a = test_pair(v1, v2)
|
||||||
if GENERATE_TEST_FILES:
|
if GENERATE_TEST_FILES:
|
||||||
sql_file.write(q + ";\n")
|
sql_file.write(q + ";\n")
|
||||||
ref_file.write(a + "\n")
|
ref_file.write(a + "\n")
|
||||||
|
|
||||||
if 'float' in sys.argv[1:]:
|
if "float" in sys.argv[1:]:
|
||||||
for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT):
|
for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT):
|
||||||
q, a = test_float_pair(i, f)
|
q, a = test_float_pair(i, f)
|
||||||
if GENERATE_TEST_FILES:
|
if GENERATE_TEST_FILES:
|
||||||
|
@ -12,6 +12,7 @@ import subprocess
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
|
||||||
def is_ipv6(host):
|
def is_ipv6(host):
|
||||||
try:
|
try:
|
||||||
socket.inet_aton(host)
|
socket.inet_aton(host)
|
||||||
@ -19,6 +20,7 @@ def is_ipv6(host):
|
|||||||
except:
|
except:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_local_port(host, ipv6):
|
def get_local_port(host, ipv6):
|
||||||
if ipv6:
|
if ipv6:
|
||||||
family = socket.AF_INET6
|
family = socket.AF_INET6
|
||||||
@ -29,8 +31,9 @@ def get_local_port(host, ipv6):
|
|||||||
fd.bind((host, 0))
|
fd.bind((host, 0))
|
||||||
return fd.getsockname()[1]
|
return fd.getsockname()[1]
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
|
||||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||||
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||||
|
|
||||||
#####################################################################################
|
#####################################################################################
|
||||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||||
@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
|||||||
#####################################################################################
|
#####################################################################################
|
||||||
|
|
||||||
# IP-address of this host accessible from the outside world. Get the first one
|
# IP-address of this host accessible from the outside world. Get the first one
|
||||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
HTTP_SERVER_HOST = (
|
||||||
|
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||||
|
)
|
||||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||||
|
|
||||||
# IP address and port of the HTTP server started from this script.
|
# IP address and port of the HTTP server started from this script.
|
||||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://"
|
||||||
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
CSV_DATA = os.path.join(
|
||||||
|
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||||
|
)
|
||||||
|
|
||||||
CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
|
||||||
|
|
||||||
def get_ch_answer(query):
|
def get_ch_answer(query):
|
||||||
host = CLICKHOUSE_HOST
|
host = CLICKHOUSE_HOST
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
host = f'[{host}]'
|
host = f"[{host}]"
|
||||||
|
|
||||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
url = os.environ.get(
|
||||||
|
"CLICKHOUSE_URL",
|
||||||
|
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||||
|
)
|
||||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||||
|
|
||||||
|
|
||||||
def check_answers(query, answer):
|
def check_answers(query, answer):
|
||||||
ch_answer = get_ch_answer(query)
|
ch_answer = get_ch_answer(query)
|
||||||
if ch_answer.strip() != answer.strip():
|
if ch_answer.strip() != answer.strip():
|
||||||
@ -68,18 +86,19 @@ def check_answers(query, answer):
|
|||||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||||
raise Exception("Fail on query")
|
raise Exception("Fail on query")
|
||||||
|
|
||||||
|
|
||||||
class CSVHTTPServer(BaseHTTPRequestHandler):
|
class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||||
def _set_headers(self):
|
def _set_headers(self):
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-type', 'text/csv')
|
self.send_header("Content-type", "text/csv")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self._set_headers()
|
self._set_headers()
|
||||||
with open(CSV_DATA, 'r') as fl:
|
with open(CSV_DATA, "r") as fl:
|
||||||
reader = csv.reader(fl, delimiter=',')
|
reader = csv.reader(fl, delimiter=",")
|
||||||
for row in reader:
|
for row in reader:
|
||||||
self.wfile.write((', '.join(row) + '\n').encode())
|
self.wfile.write((", ".join(row) + "\n").encode())
|
||||||
return
|
return
|
||||||
|
|
||||||
def do_HEAD(self):
|
def do_HEAD(self):
|
||||||
@ -87,33 +106,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def read_chunk(self):
|
def read_chunk(self):
|
||||||
msg = ''
|
msg = ""
|
||||||
while True:
|
while True:
|
||||||
sym = self.rfile.read(1)
|
sym = self.rfile.read(1)
|
||||||
if sym == '':
|
if sym == "":
|
||||||
break
|
break
|
||||||
msg += sym.decode('utf-8')
|
msg += sym.decode("utf-8")
|
||||||
if msg.endswith('\r\n'):
|
if msg.endswith("\r\n"):
|
||||||
break
|
break
|
||||||
length = int(msg[:-2], 16)
|
length = int(msg[:-2], 16)
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return ''
|
return ""
|
||||||
content = self.rfile.read(length)
|
content = self.rfile.read(length)
|
||||||
self.rfile.read(2) # read sep \r\n
|
self.rfile.read(2) # read sep \r\n
|
||||||
return content.decode('utf-8')
|
return content.decode("utf-8")
|
||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
data = ''
|
data = ""
|
||||||
while True:
|
while True:
|
||||||
chunk = self.read_chunk()
|
chunk = self.read_chunk()
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
data += chunk
|
data += chunk
|
||||||
with StringIO(data) as fl:
|
with StringIO(data) as fl:
|
||||||
reader = csv.reader(fl, delimiter=',')
|
reader = csv.reader(fl, delimiter=",")
|
||||||
with open(CSV_DATA, 'a') as d:
|
with open(CSV_DATA, "a") as d:
|
||||||
for row in reader:
|
for row in reader:
|
||||||
d.write(','.join(row) + '\n')
|
d.write(",".join(row) + "\n")
|
||||||
self._set_headers()
|
self._set_headers()
|
||||||
self.wfile.write(b"ok")
|
self.wfile.write(b"ok")
|
||||||
|
|
||||||
@ -124,6 +143,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
|||||||
class HTTPServerV6(HTTPServer):
|
class HTTPServerV6(HTTPServer):
|
||||||
address_family = socket.AF_INET6
|
address_family = socket.AF_INET6
|
||||||
|
|
||||||
|
|
||||||
def start_server():
|
def start_server():
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
||||||
@ -133,49 +153,76 @@ def start_server():
|
|||||||
t = threading.Thread(target=httpd.serve_forever)
|
t = threading.Thread(target=httpd.serve_forever)
|
||||||
return t, httpd
|
return t, httpd
|
||||||
|
|
||||||
|
|
||||||
# test section
|
# test section
|
||||||
|
|
||||||
def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""):
|
|
||||||
with open(CSV_DATA, 'w') as f: # clear file
|
def test_select(
|
||||||
f.write('')
|
table_name="",
|
||||||
|
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||||
|
requests=[],
|
||||||
|
answers=[],
|
||||||
|
test_data="",
|
||||||
|
):
|
||||||
|
with open(CSV_DATA, "w") as f: # clear file
|
||||||
|
f.write("")
|
||||||
|
|
||||||
if test_data:
|
if test_data:
|
||||||
with open(CSV_DATA, 'w') as f:
|
with open(CSV_DATA, "w") as f:
|
||||||
f.write(test_data + "\n")
|
f.write(test_data + "\n")
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
get_ch_answer(
|
||||||
|
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||||
|
table_name, schema, HTTP_SERVER_URL_STR
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for i in range(len(requests)):
|
for i in range(len(requests)):
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
check_answers(requests[i].format(tbl=tbl), answers[i])
|
check_answers(requests[i].format(tbl=tbl), answers[i])
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
|
|
||||||
|
|
||||||
def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]):
|
def test_insert(
|
||||||
with open(CSV_DATA, 'w') as f: # flush test file
|
table_name="",
|
||||||
f.write('')
|
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||||
|
requests_insert=[],
|
||||||
|
requests_select=[],
|
||||||
|
answers=[],
|
||||||
|
):
|
||||||
|
with open(CSV_DATA, "w") as f: # flush test file
|
||||||
|
f.write("")
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
get_ch_answer(
|
||||||
|
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||||
|
table_name, schema, HTTP_SERVER_URL_STR
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for req in requests_insert:
|
for req in requests_insert:
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
get_ch_answer(req.format(tbl=tbl))
|
get_ch_answer(req.format(tbl=tbl))
|
||||||
|
|
||||||
|
|
||||||
for i in range(len(requests_select)):
|
for i in range(len(requests_select)):
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
@ -185,9 +232,11 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do
|
|||||||
def main():
|
def main():
|
||||||
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
||||||
select_only_requests = {
|
select_only_requests = {
|
||||||
"select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'),
|
"select str,numuint,numint,double from {tbl}": test_data.replace(",", "\t"),
|
||||||
"select numuint, count(*) from {tbl} group by numuint" : "2\t2",
|
"select numuint, count(*) from {tbl} group by numuint": "2\t2",
|
||||||
"select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'),
|
"select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[
|
||||||
|
0
|
||||||
|
].replace(",", "\t"),
|
||||||
}
|
}
|
||||||
|
|
||||||
insert_requests = [
|
insert_requests = [
|
||||||
@ -196,21 +245,41 @@ def main():
|
|||||||
]
|
]
|
||||||
|
|
||||||
select_requests = {
|
select_requests = {
|
||||||
"select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]),
|
"select distinct numuint from {tbl} order by numuint": "\n".join(
|
||||||
"select count(*) from {tbl}": '12',
|
[str(i) for i in range(11)]
|
||||||
'select double, count(*) from {tbl} group by double order by double': "7.7\t2\n9.9\t10"
|
),
|
||||||
|
"select count(*) from {tbl}": "12",
|
||||||
|
"select double, count(*) from {tbl} group by double order by double": "7.7\t2\n9.9\t10",
|
||||||
}
|
}
|
||||||
|
|
||||||
t, httpd = start_server()
|
t, httpd = start_server()
|
||||||
t.start()
|
t.start()
|
||||||
# test table with url engine
|
# test table with url engine
|
||||||
test_select(table_name="test_table_select", requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data)
|
test_select(
|
||||||
|
table_name="test_table_select",
|
||||||
|
requests=list(select_only_requests.keys()),
|
||||||
|
answers=list(select_only_requests.values()),
|
||||||
|
test_data=test_data,
|
||||||
|
)
|
||||||
# test table function url
|
# test table function url
|
||||||
test_select(requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data)
|
test_select(
|
||||||
#test insert into table with url engine
|
requests=list(select_only_requests.keys()),
|
||||||
test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values()))
|
answers=list(select_only_requests.values()),
|
||||||
#test insert into table function url
|
test_data=test_data,
|
||||||
test_insert(requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values()))
|
)
|
||||||
|
# test insert into table with url engine
|
||||||
|
test_insert(
|
||||||
|
table_name="test_table_insert",
|
||||||
|
requests_insert=insert_requests,
|
||||||
|
requests_select=list(select_requests.keys()),
|
||||||
|
answers=list(select_requests.values()),
|
||||||
|
)
|
||||||
|
# test insert into table function url
|
||||||
|
test_insert(
|
||||||
|
requests_insert=insert_requests,
|
||||||
|
requests_select=list(select_requests.keys()),
|
||||||
|
answers=list(select_requests.values()),
|
||||||
|
)
|
||||||
|
|
||||||
httpd.shutdown()
|
httpd.shutdown()
|
||||||
t.join()
|
t.join()
|
||||||
|
@ -12,35 +12,46 @@ HAYSTACKS = [
|
|||||||
|
|
||||||
NEEDLE = "needle"
|
NEEDLE = "needle"
|
||||||
|
|
||||||
HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE)
|
HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE)
|
||||||
NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE)
|
NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def replace_follow_case(replacement):
|
def replace_follow_case(replacement):
|
||||||
def func(match):
|
def func(match):
|
||||||
g = match.group()
|
g = match.group()
|
||||||
if g.islower(): return replacement.lower()
|
if g.islower():
|
||||||
if g.istitle(): return replacement.title()
|
return replacement.lower()
|
||||||
if g.isupper(): return replacement.upper()
|
if g.istitle():
|
||||||
|
return replacement.title()
|
||||||
|
if g.isupper():
|
||||||
|
return replacement.upper()
|
||||||
return replacement
|
return replacement
|
||||||
|
|
||||||
return func
|
return func
|
||||||
|
|
||||||
|
|
||||||
def replace_separators(query, new_sep):
|
def replace_separators(query, new_sep):
|
||||||
SEP_RE = re.compile('\\s+')
|
SEP_RE = re.compile("\\s+")
|
||||||
result = SEP_RE.sub(new_sep, query)
|
result = SEP_RE.sub(new_sep, query)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def enlarge_haystack(query, times, separator=''):
|
|
||||||
return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query)
|
def enlarge_haystack(query, times, separator=""):
|
||||||
|
return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query)
|
||||||
|
|
||||||
|
|
||||||
def small_needle(query):
|
def small_needle(query):
|
||||||
return NEEDLE_RE.sub(replace_follow_case('n'), query)
|
return NEEDLE_RE.sub(replace_follow_case("n"), query)
|
||||||
|
|
||||||
|
|
||||||
def remove_needle(query):
|
def remove_needle(query):
|
||||||
return NEEDLE_RE.sub('', query)
|
return NEEDLE_RE.sub("", query)
|
||||||
|
|
||||||
|
|
||||||
def replace_needle(query, new_needle):
|
def replace_needle(query, new_needle):
|
||||||
return NEEDLE_RE.sub(new_needle, query)
|
return NEEDLE_RE.sub(new_needle, query)
|
||||||
|
|
||||||
|
|
||||||
# with str.lower, str.uppert, str.title and such
|
# with str.lower, str.uppert, str.title and such
|
||||||
def transform_needle(query, string_transformation_func):
|
def transform_needle(query, string_transformation_func):
|
||||||
def replace_with_transformation(match):
|
def replace_with_transformation(match):
|
||||||
@ -49,19 +60,21 @@ def transform_needle(query, string_transformation_func):
|
|||||||
|
|
||||||
return NEEDLE_RE.sub(replace_with_transformation, query)
|
return NEEDLE_RE.sub(replace_with_transformation, query)
|
||||||
|
|
||||||
def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template):
|
|
||||||
|
def create_cases(
|
||||||
|
case_sensitive_func,
|
||||||
|
case_insensitive_func,
|
||||||
|
table_row_template,
|
||||||
|
table_query_template,
|
||||||
|
const_query_template,
|
||||||
|
):
|
||||||
const_queries = []
|
const_queries = []
|
||||||
table_rows = []
|
table_rows = []
|
||||||
table_queries = set()
|
table_queries = set()
|
||||||
|
|
||||||
def add_case(func, haystack, needle, match):
|
def add_case(func, haystack, needle, match):
|
||||||
match = int(match)
|
match = int(match)
|
||||||
args = dict(
|
args = dict(func=func, haystack=haystack, needle=needle, match=match)
|
||||||
func = func,
|
|
||||||
haystack = haystack,
|
|
||||||
needle = needle,
|
|
||||||
match = match
|
|
||||||
)
|
|
||||||
const_queries.append(const_query_template.substitute(args))
|
const_queries.append(const_query_template.substitute(args))
|
||||||
table_queries.add(table_query_template.substitute(args))
|
table_queries.add(table_query_template.substitute(args))
|
||||||
table_rows.append(table_row_template.substitute(args))
|
table_rows.append(table_row_template.substitute(args))
|
||||||
@ -69,14 +82,28 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
|||||||
def add_case_sensitive(haystack, needle, match):
|
def add_case_sensitive(haystack, needle, match):
|
||||||
add_case(case_sensitive_func, haystack, needle, match)
|
add_case(case_sensitive_func, haystack, needle, match)
|
||||||
if match:
|
if match:
|
||||||
add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match)
|
add_case(
|
||||||
|
case_sensitive_func,
|
||||||
|
transform_needle(haystack, str.swapcase),
|
||||||
|
transform_needle(needle, str.swapcase),
|
||||||
|
match,
|
||||||
|
)
|
||||||
|
|
||||||
def add_case_insensitive(haystack, needle, match):
|
def add_case_insensitive(haystack, needle, match):
|
||||||
add_case(case_insensitive_func, haystack, needle, match)
|
add_case(case_insensitive_func, haystack, needle, match)
|
||||||
if match:
|
if match:
|
||||||
add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match)
|
add_case(
|
||||||
add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match)
|
case_insensitive_func,
|
||||||
|
transform_needle(haystack, str.swapcase),
|
||||||
|
needle,
|
||||||
|
match,
|
||||||
|
)
|
||||||
|
add_case(
|
||||||
|
case_insensitive_func,
|
||||||
|
haystack,
|
||||||
|
transform_needle(needle, str.swapcase),
|
||||||
|
match,
|
||||||
|
)
|
||||||
|
|
||||||
# Negative cases
|
# Negative cases
|
||||||
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
||||||
@ -85,7 +112,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
|||||||
for haystack in HAYSTACKS:
|
for haystack in HAYSTACKS:
|
||||||
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
|
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
|
||||||
|
|
||||||
sep = ''
|
sep = ""
|
||||||
h = replace_separators(haystack, sep)
|
h = replace_separators(haystack, sep)
|
||||||
|
|
||||||
add_case_sensitive(h, NEEDLE, False)
|
add_case_sensitive(h, NEEDLE, False)
|
||||||
@ -102,8 +129,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
|||||||
add_case_sensitive(haystack, NEEDLE, True)
|
add_case_sensitive(haystack, NEEDLE, True)
|
||||||
add_case_insensitive(haystack, NEEDLE, True)
|
add_case_insensitive(haystack, NEEDLE, True)
|
||||||
|
|
||||||
|
for sep in list(""" ,"""):
|
||||||
for sep in list(''' ,'''):
|
|
||||||
h = replace_separators(haystack, sep)
|
h = replace_separators(haystack, sep)
|
||||||
add_case_sensitive(h, NEEDLE, True)
|
add_case_sensitive(h, NEEDLE, True)
|
||||||
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
|
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
|
||||||
@ -114,32 +140,43 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
|||||||
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
||||||
|
|
||||||
# case insesitivity works only on ASCII strings
|
# case insesitivity works only on ASCII strings
|
||||||
add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True)
|
add_case_sensitive(
|
||||||
add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True)
|
replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True
|
||||||
|
)
|
||||||
|
add_case_sensitive(
|
||||||
|
replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True
|
||||||
|
)
|
||||||
|
|
||||||
for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']:
|
for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]:
|
||||||
h = replace_separators(HAYSTACKS[0], sep)
|
h = replace_separators(HAYSTACKS[0], sep)
|
||||||
add_case(case_sensitive_func, h, NEEDLE, True)
|
add_case(case_sensitive_func, h, NEEDLE, True)
|
||||||
|
|
||||||
return table_rows, table_queries, const_queries
|
return table_rows, table_queries, const_queries
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
|
def main():
|
||||||
def query(x):
|
def query(x):
|
||||||
print(x)
|
print(x)
|
||||||
|
|
||||||
CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""")
|
CONST_QUERY = Template(
|
||||||
TABLE_QUERY = Template("""WITH '${needle}' as n
|
"""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};"""
|
||||||
|
)
|
||||||
|
TABLE_QUERY = Template(
|
||||||
|
"""WITH '${needle}' as n
|
||||||
SELECT haystack, needle, ${func}(haystack, n) as result
|
SELECT haystack, needle, ${func}(haystack, n) as result
|
||||||
FROM ht
|
FROM ht
|
||||||
WHERE func = '${func}' AND needle = n AND result != match;""")
|
WHERE func = '${func}' AND needle = n AND result != match;"""
|
||||||
|
)
|
||||||
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
|
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
|
||||||
|
|
||||||
rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY)
|
rows, table_queries, const_queries = create_cases(
|
||||||
|
"hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY
|
||||||
|
)
|
||||||
for q in const_queries:
|
for q in const_queries:
|
||||||
query(q)
|
query(q)
|
||||||
|
|
||||||
query("""DROP TABLE IF EXISTS ht;
|
query(
|
||||||
|
"""DROP TABLE IF EXISTS ht;
|
||||||
CREATE TABLE IF NOT EXISTS
|
CREATE TABLE IF NOT EXISTS
|
||||||
ht
|
ht
|
||||||
(
|
(
|
||||||
@ -150,11 +187,15 @@ def main():
|
|||||||
)
|
)
|
||||||
ENGINE MergeTree()
|
ENGINE MergeTree()
|
||||||
ORDER BY haystack;
|
ORDER BY haystack;
|
||||||
INSERT INTO ht VALUES {values};""".format(values=", ".join(rows)))
|
INSERT INTO ht VALUES {values};""".format(
|
||||||
|
values=", ".join(rows)
|
||||||
|
)
|
||||||
|
)
|
||||||
for q in sorted(table_queries):
|
for q in sorted(table_queries):
|
||||||
query(q)
|
query(q)
|
||||||
|
|
||||||
query("""DROP TABLE ht""")
|
query("""DROP TABLE ht""")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -8,28 +8,32 @@ import sys
|
|||||||
import signal
|
import signal
|
||||||
|
|
||||||
|
|
||||||
CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT')
|
CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT")
|
||||||
CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL')
|
CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL")
|
||||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||||
|
|
||||||
|
|
||||||
def send_query(query):
|
def send_query(query):
|
||||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||||
cmd += ['--query', query]
|
cmd += ["--query", query]
|
||||||
# print(cmd)
|
# print(cmd)
|
||||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
return subprocess.Popen(
|
||||||
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
|
).stdout
|
||||||
|
|
||||||
|
|
||||||
def send_query_in_process_group(query):
|
def send_query_in_process_group(query):
|
||||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||||
cmd += ['--query', query]
|
cmd += ["--query", query]
|
||||||
# print(cmd)
|
# print(cmd)
|
||||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid)
|
return subprocess.Popen(
|
||||||
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def read_lines_and_push_to_queue(pipe, queue):
|
def read_lines_and_push_to_queue(pipe, queue):
|
||||||
try:
|
try:
|
||||||
for line in iter(pipe.readline, ''):
|
for line in iter(pipe.readline, ""):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
print(line)
|
print(line)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
@ -41,41 +45,44 @@ def read_lines_and_push_to_queue(pipe, queue):
|
|||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
send_query('DROP TABLE IF EXISTS test.lv').read()
|
send_query("DROP TABLE IF EXISTS test.lv").read()
|
||||||
send_query('DROP TABLE IF EXISTS test.mt').read()
|
send_query("DROP TABLE IF EXISTS test.mt").read()
|
||||||
send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read()
|
send_query(
|
||||||
send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read()
|
"CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()"
|
||||||
|
).read()
|
||||||
|
send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read()
|
||||||
|
|
||||||
q = queue.Queue()
|
q = queue.Queue()
|
||||||
p = send_query_in_process_group('WATCH test.lv')
|
p = send_query_in_process_group("WATCH test.lv")
|
||||||
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q))
|
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q))
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line == '0\t1')
|
assert line == "0\t1"
|
||||||
|
|
||||||
send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read()
|
send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read()
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line == '6\t2')
|
assert line == "6\t2"
|
||||||
|
|
||||||
send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read()
|
send_query("INSERT INTO test.mt VALUES (4),(5),(6)").read()
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line == '21\t3')
|
assert line == "21\t3"
|
||||||
|
|
||||||
# Send Ctrl+C to client.
|
# Send Ctrl+C to client.
|
||||||
os.killpg(os.getpgid(p.pid), signal.SIGINT)
|
os.killpg(os.getpgid(p.pid), signal.SIGINT)
|
||||||
# This insert shouldn't affect lv.
|
# This insert shouldn't affect lv.
|
||||||
send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read()
|
send_query("INSERT INTO test.mt VALUES (7),(8),(9)").read()
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line is None)
|
assert line is None
|
||||||
|
|
||||||
send_query('DROP TABLE if exists test.lv').read()
|
send_query("DROP TABLE if exists test.lv").read()
|
||||||
send_query('DROP TABLE if exists test.lv').read()
|
send_query("DROP TABLE if exists test.lv").read()
|
||||||
|
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
|
|
||||||
test()
|
test()
|
||||||
|
@ -7,26 +7,30 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT')
|
CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT")
|
||||||
CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL')
|
CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL")
|
||||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||||
|
|
||||||
|
|
||||||
def send_query(query):
|
def send_query(query):
|
||||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||||
cmd += ['--query', query]
|
cmd += ["--query", query]
|
||||||
# print(cmd)
|
# print(cmd)
|
||||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
return subprocess.Popen(
|
||||||
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
|
).stdout
|
||||||
|
|
||||||
|
|
||||||
def send_http_query(query):
|
def send_http_query(query):
|
||||||
cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10'])
|
cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10'])
|
||||||
cmd += ['-sSN', CLICKHOUSE_URL, '-d', query]
|
cmd += ["-sSN", CLICKHOUSE_URL, "-d", query]
|
||||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
return subprocess.Popen(
|
||||||
|
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
|
).stdout
|
||||||
|
|
||||||
|
|
||||||
def read_lines_and_push_to_queue(pipe, queue):
|
def read_lines_and_push_to_queue(pipe, queue):
|
||||||
for line in iter(pipe.readline, ''):
|
for line in iter(pipe.readline, ""):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
print(line)
|
print(line)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
@ -36,28 +40,31 @@ def read_lines_and_push_to_queue(pipe, queue):
|
|||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
send_query('DROP TABLE IF EXISTS test.lv').read()
|
send_query("DROP TABLE IF EXISTS test.lv").read()
|
||||||
send_query('DROP TABLE IF EXISTS test.mt').read()
|
send_query("DROP TABLE IF EXISTS test.mt").read()
|
||||||
send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read()
|
send_query(
|
||||||
send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read()
|
"CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()"
|
||||||
|
).read()
|
||||||
|
send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read()
|
||||||
|
|
||||||
q = queue.Queue()
|
q = queue.Queue()
|
||||||
pipe = send_http_query('WATCH test.lv')
|
pipe = send_http_query("WATCH test.lv")
|
||||||
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q))
|
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q))
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line == '0\t1')
|
assert line == "0\t1"
|
||||||
|
|
||||||
send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read()
|
send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read()
|
||||||
line = q.get()
|
line = q.get()
|
||||||
print(line)
|
print(line)
|
||||||
assert (line == '6\t2')
|
assert line == "6\t2"
|
||||||
|
|
||||||
send_query('DROP TABLE if exists test.lv').read()
|
send_query("DROP TABLE if exists test.lv").read()
|
||||||
send_query('DROP TABLE if exists test.lv').read()
|
send_query("DROP TABLE if exists test.lv").read()
|
||||||
|
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
|
|
||||||
test()
|
test()
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
@ -6,70 +6,86 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
def test_and_check(name, a, b, t_stat, p_value, precision=1e-2):
|
def test_and_check(name, a, b, t_stat, p_value, precision=1e-2):
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
client.query("DROP TABLE IF EXISTS ttest;")
|
client.query("DROP TABLE IF EXISTS ttest;")
|
||||||
client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;");
|
client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;")
|
||||||
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a])))
|
client.query(
|
||||||
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b])))
|
"INSERT INTO ttest VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(i, 0) for i in a])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
client.query(
|
||||||
|
"INSERT INTO ttest VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(j, 1) for j in b])
|
||||||
|
)
|
||||||
|
)
|
||||||
real = client.query_return_df(
|
real = client.query_return_df(
|
||||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||||
"roundBankers({}(left, right).2, 16) as p_value ".format(name) +
|
+ "roundBankers({}(left, right).2, 16) as p_value ".format(name)
|
||||||
"FROM ttest FORMAT TabSeparatedWithNames;")
|
+ "FROM ttest FORMAT TabSeparatedWithNames;"
|
||||||
real_t_stat = real['t_stat'][0]
|
)
|
||||||
real_p_value = real['p_value'][0]
|
real_t_stat = real["t_stat"][0]
|
||||||
assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
real_p_value = real["p_value"][0]
|
||||||
assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
assert (
|
||||||
|
abs(real_t_stat - np.float64(t_stat)) < precision
|
||||||
|
), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||||
|
assert (
|
||||||
|
abs(real_p_value - np.float64(p_value)) < precision
|
||||||
|
), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||||
client.query("DROP TABLE IF EXISTS ttest;")
|
client.query("DROP TABLE IF EXISTS ttest;")
|
||||||
|
|
||||||
|
|
||||||
def test_student():
|
def test_student():
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
|
||||||
def test_welch():
|
def test_welch():
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=15, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=7, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=3, size=500), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=1, size=512), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=5, scale=10, size=512), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=10, size=1024), 2)
|
||||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_student()
|
test_student()
|
||||||
test_welch()
|
test_welch()
|
||||||
print("Ok.")
|
print("Ok.")
|
||||||
|
@ -6,7 +6,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
@ -14,40 +14,51 @@ from pure_http_client import ClickHouseClient
|
|||||||
def test_and_check(name, a, b, t_stat, p_value):
|
def test_and_check(name, a, b, t_stat, p_value):
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
||||||
client.query("CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;");
|
client.query(
|
||||||
client.query("INSERT INTO mann_whitney VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)])))
|
"CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"
|
||||||
|
)
|
||||||
|
client.query(
|
||||||
|
"INSERT INTO mann_whitney VALUES {};".format(
|
||||||
|
", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
real = client.query_return_df(
|
real = client.query_return_df(
|
||||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||||
"roundBankers({}(left, right).2, 16) as p_value ".format(name) +
|
+ "roundBankers({}(left, right).2, 16) as p_value ".format(name)
|
||||||
"FROM mann_whitney FORMAT TabSeparatedWithNames;")
|
+ "FROM mann_whitney FORMAT TabSeparatedWithNames;"
|
||||||
real_t_stat = real['t_stat'][0]
|
)
|
||||||
real_p_value = real['p_value'][0]
|
real_t_stat = real["t_stat"][0]
|
||||||
assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
real_p_value = real["p_value"][0]
|
||||||
assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
assert abs(
|
||||||
|
real_t_stat - np.float64(t_stat) < 1e-2
|
||||||
|
), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||||
|
assert (
|
||||||
|
abs(real_p_value - np.float64(p_value)) < 1e-2
|
||||||
|
), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||||
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
||||||
|
|
||||||
|
|
||||||
def test_mann_whitney():
|
def test_mann_whitney():
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5)
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 5)
|
||||||
rvs2 = np.round(stats.expon.rvs(scale=0.2,size=500), 5)
|
rvs2 = np.round(stats.expon.rvs(scale=0.2, size=500), 5)
|
||||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='two-sided')
|
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided")
|
||||||
test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p)
|
test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p)
|
||||||
test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)
|
test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5)
|
equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5)
|
||||||
s, p = stats.mannwhitneyu(equal, equal, alternative='two-sided')
|
s, p = stats.mannwhitneyu(equal, equal, alternative="two-sided")
|
||||||
test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p)
|
test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p)
|
||||||
|
|
||||||
s, p = stats.mannwhitneyu(equal, equal, alternative='less', use_continuity=False)
|
s, p = stats.mannwhitneyu(equal, equal, alternative="less", use_continuity=False)
|
||||||
test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p)
|
test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p)
|
||||||
|
|
||||||
|
rvs1 = np.round(stats.cauchy.rvs(scale=10, size=65536), 5)
|
||||||
rvs1 = np.round(stats.cauchy.rvs(scale=10,size=65536), 5)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=65536), 5)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5)
|
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="greater")
|
||||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='greater')
|
|
||||||
test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p)
|
test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_mann_whitney()
|
test_mann_whitney()
|
||||||
print("Ok.")
|
print("Ok.")
|
||||||
|
@ -4,14 +4,18 @@ from random import randint, choices
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
|
|
||||||
N = 10
|
N = 10
|
||||||
create_query = "CREATE TABLE t_cnf_fuzz(" + ", ".join([f"c{i} UInt8" for i in range(N)]) + ") ENGINE = Memory"
|
create_query = (
|
||||||
|
"CREATE TABLE t_cnf_fuzz("
|
||||||
|
+ ", ".join([f"c{i} UInt8" for i in range(N)])
|
||||||
|
+ ") ENGINE = Memory"
|
||||||
|
)
|
||||||
|
|
||||||
client.query("DROP TABLE IF EXISTS t_cnf_fuzz")
|
client.query("DROP TABLE IF EXISTS t_cnf_fuzz")
|
||||||
client.query(create_query)
|
client.query(create_query)
|
||||||
@ -35,6 +39,7 @@ client.query(insert_query)
|
|||||||
MAX_CLAUSES = 10
|
MAX_CLAUSES = 10
|
||||||
MAX_ATOMS = 5
|
MAX_ATOMS = 5
|
||||||
|
|
||||||
|
|
||||||
def generate_dnf():
|
def generate_dnf():
|
||||||
clauses = []
|
clauses = []
|
||||||
num_clauses = randint(1, MAX_CLAUSES)
|
num_clauses = randint(1, MAX_CLAUSES)
|
||||||
@ -42,12 +47,17 @@ def generate_dnf():
|
|||||||
num_atoms = randint(1, MAX_ATOMS)
|
num_atoms = randint(1, MAX_ATOMS)
|
||||||
atom_ids = choices(range(N), k=num_atoms)
|
atom_ids = choices(range(N), k=num_atoms)
|
||||||
negates = choices([0, 1], k=num_atoms)
|
negates = choices([0, 1], k=num_atoms)
|
||||||
atoms = [f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)]
|
atoms = [
|
||||||
|
f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)
|
||||||
|
]
|
||||||
clauses.append("(" + " AND ".join(atoms) + ")")
|
clauses.append("(" + " AND ".join(atoms) + ")")
|
||||||
|
|
||||||
return " OR ".join(clauses)
|
return " OR ".join(clauses)
|
||||||
|
|
||||||
select_query = "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}"
|
|
||||||
|
select_query = (
|
||||||
|
"SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}"
|
||||||
|
)
|
||||||
|
|
||||||
fail_report = """
|
fail_report = """
|
||||||
Failed query: '{}'.
|
Failed query: '{}'.
|
||||||
|
@ -5,15 +5,20 @@ import random
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
def get_random_string(length):
|
def get_random_string(length):
|
||||||
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
|
return "".join(
|
||||||
|
random.choice(string.ascii_uppercase + string.digits) for _ in range(length)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
|
|
||||||
|
|
||||||
def insert_block(table_name, block_granularity_rows, block_rows):
|
def insert_block(table_name, block_granularity_rows, block_rows):
|
||||||
global client
|
global client
|
||||||
block_data = []
|
block_data = []
|
||||||
@ -25,9 +30,12 @@ def insert_block(table_name, block_granularity_rows, block_rows):
|
|||||||
values_row = ", ".join("(1, '" + row + "')" for row in block_data)
|
values_row = ", ".join("(1, '" + row + "')" for row in block_data)
|
||||||
client.query("INSERT INTO {} VALUES {}".format(table_name, values_row))
|
client.query("INSERT INTO {} VALUES {}".format(table_name, values_row))
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client.query("DROP TABLE IF EXISTS t")
|
client.query("DROP TABLE IF EXISTS t")
|
||||||
client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0")
|
client.query(
|
||||||
|
"CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0"
|
||||||
|
)
|
||||||
|
|
||||||
client.query("SYSTEM STOP MERGES t")
|
client.query("SYSTEM STOP MERGES t")
|
||||||
|
|
||||||
@ -53,6 +61,10 @@ try:
|
|||||||
client.query("SYSTEM START MERGES t")
|
client.query("SYSTEM START MERGES t")
|
||||||
client.query("OPTIMIZE TABLE t FINAL")
|
client.query("OPTIMIZE TABLE t FINAL")
|
||||||
|
|
||||||
print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0])
|
print(
|
||||||
|
client.query_return_df(
|
||||||
|
"SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames"
|
||||||
|
)["C"][0]
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
client.query("DROP TABLE IF EXISTS t")
|
client.query("DROP TABLE IF EXISTS t")
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from http.server import SimpleHTTPRequestHandler,HTTPServer
|
from http.server import SimpleHTTPRequestHandler, HTTPServer
|
||||||
import socket
|
import socket
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
@ -21,6 +21,7 @@ def is_ipv6(host):
|
|||||||
except:
|
except:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_local_port(host, ipv6):
|
def get_local_port(host, ipv6):
|
||||||
if ipv6:
|
if ipv6:
|
||||||
family = socket.AF_INET6
|
family = socket.AF_INET6
|
||||||
@ -31,8 +32,9 @@ def get_local_port(host, ipv6):
|
|||||||
fd.bind((host, 0))
|
fd.bind((host, 0))
|
||||||
return fd.getsockname()[1]
|
return fd.getsockname()[1]
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
|
|
||||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost")
|
||||||
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||||
|
|
||||||
#####################################################################################
|
#####################################################################################
|
||||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||||
@ -42,16 +44,24 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
|||||||
#####################################################################################
|
#####################################################################################
|
||||||
|
|
||||||
# IP-address of this host accessible from the outside world. Get the first one
|
# IP-address of this host accessible from the outside world. Get the first one
|
||||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
HTTP_SERVER_HOST = (
|
||||||
|
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||||
|
)
|
||||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||||
|
|
||||||
# IP address and port of the HTTP server started from this script.
|
# IP address and port of the HTTP server started from this script.
|
||||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://"
|
||||||
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||||
|
)
|
||||||
|
|
||||||
# Because we need to check the content of file.csv we can create this content and avoid reading csv
|
# Because we need to check the content of file.csv we can create this content and avoid reading csv
|
||||||
CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n"
|
CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n"
|
||||||
@ -59,19 +69,24 @@ CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555
|
|||||||
|
|
||||||
# Choose compression method
|
# Choose compression method
|
||||||
# (Will change during test, need to check standard data sending, to make sure that nothing broke)
|
# (Will change during test, need to check standard data sending, to make sure that nothing broke)
|
||||||
COMPRESS_METHOD = 'none'
|
COMPRESS_METHOD = "none"
|
||||||
ADDING_ENDING = ''
|
ADDING_ENDING = ""
|
||||||
ENDINGS = ['.gz', '.xz']
|
ENDINGS = [".gz", ".xz"]
|
||||||
SEND_ENCODING = True
|
SEND_ENCODING = True
|
||||||
|
|
||||||
|
|
||||||
def get_ch_answer(query):
|
def get_ch_answer(query):
|
||||||
host = CLICKHOUSE_HOST
|
host = CLICKHOUSE_HOST
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
host = f'[{host}]'
|
host = f"[{host}]"
|
||||||
|
|
||||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
url = os.environ.get(
|
||||||
|
"CLICKHOUSE_URL",
|
||||||
|
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||||
|
)
|
||||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||||
|
|
||||||
|
|
||||||
def check_answers(query, answer):
|
def check_answers(query, answer):
|
||||||
ch_answer = get_ch_answer(query)
|
ch_answer = get_ch_answer(query)
|
||||||
if ch_answer.strip() != answer.strip():
|
if ch_answer.strip() != answer.strip():
|
||||||
@ -80,18 +95,19 @@ def check_answers(query, answer):
|
|||||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||||
raise Exception("Fail on query")
|
raise Exception("Fail on query")
|
||||||
|
|
||||||
|
|
||||||
# Server with head method which is useful for debuging by hands
|
# Server with head method which is useful for debuging by hands
|
||||||
class HttpProcessor(SimpleHTTPRequestHandler):
|
class HttpProcessor(SimpleHTTPRequestHandler):
|
||||||
def _set_headers(self):
|
def _set_headers(self):
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
if SEND_ENCODING:
|
if SEND_ENCODING:
|
||||||
self.send_header('Content-Encoding', COMPRESS_METHOD)
|
self.send_header("Content-Encoding", COMPRESS_METHOD)
|
||||||
if COMPRESS_METHOD == 'none':
|
if COMPRESS_METHOD == "none":
|
||||||
self.send_header('Content-Length', len(CSV_DATA.encode()))
|
self.send_header("Content-Length", len(CSV_DATA.encode()))
|
||||||
else:
|
else:
|
||||||
self.compress_data()
|
self.compress_data()
|
||||||
self.send_header('Content-Length', len(self.data))
|
self.send_header("Content-Length", len(self.data))
|
||||||
self.send_header('Content-Type', 'text/csv')
|
self.send_header("Content-Type", "text/csv")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
def do_HEAD(self):
|
def do_HEAD(self):
|
||||||
@ -99,18 +115,17 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def compress_data(self):
|
def compress_data(self):
|
||||||
if COMPRESS_METHOD == 'gzip':
|
if COMPRESS_METHOD == "gzip":
|
||||||
self.data = gzip.compress((CSV_DATA).encode())
|
self.data = gzip.compress((CSV_DATA).encode())
|
||||||
elif COMPRESS_METHOD == 'lzma':
|
elif COMPRESS_METHOD == "lzma":
|
||||||
self.data = lzma.compress((CSV_DATA).encode())
|
self.data = lzma.compress((CSV_DATA).encode())
|
||||||
else:
|
else:
|
||||||
self.data = 'WRONG CONVERSATION'.encode()
|
self.data = "WRONG CONVERSATION".encode()
|
||||||
|
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self._set_headers()
|
self._set_headers()
|
||||||
|
|
||||||
if COMPRESS_METHOD == 'none':
|
if COMPRESS_METHOD == "none":
|
||||||
self.wfile.write(CSV_DATA.encode())
|
self.wfile.write(CSV_DATA.encode())
|
||||||
else:
|
else:
|
||||||
self.wfile.write(self.data)
|
self.wfile.write(self.data)
|
||||||
@ -119,9 +134,11 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
|||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class HTTPServerV6(HTTPServer):
|
class HTTPServerV6(HTTPServer):
|
||||||
address_family = socket.AF_INET6
|
address_family = socket.AF_INET6
|
||||||
|
|
||||||
|
|
||||||
def start_server(requests_amount):
|
def start_server(requests_amount):
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
||||||
@ -135,52 +152,60 @@ def start_server(requests_amount):
|
|||||||
t = threading.Thread(target=real_func)
|
t = threading.Thread(target=real_func)
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
# Testing area.
|
# Testing area.
|
||||||
#####################################################################
|
#####################################################################
|
||||||
|
|
||||||
def test_select(dict_name="", schema="word String, counter UInt32", requests=[], answers=[], test_data=""):
|
|
||||||
|
def test_select(
|
||||||
|
dict_name="",
|
||||||
|
schema="word String, counter UInt32",
|
||||||
|
requests=[],
|
||||||
|
answers=[],
|
||||||
|
test_data="",
|
||||||
|
):
|
||||||
global ADDING_ENDING
|
global ADDING_ENDING
|
||||||
global SEND_ENCODING
|
global SEND_ENCODING
|
||||||
global COMPRESS_METHOD
|
global COMPRESS_METHOD
|
||||||
for i in range(len(requests)):
|
for i in range(len(requests)):
|
||||||
if i > 2:
|
if i > 2:
|
||||||
ADDING_ENDING = ENDINGS[i-3]
|
ADDING_ENDING = ENDINGS[i - 3]
|
||||||
SEND_ENCODING = False
|
SEND_ENCODING = False
|
||||||
|
|
||||||
if dict_name:
|
if dict_name:
|
||||||
get_ch_answer("drop dictionary if exists {}".format(dict_name))
|
get_ch_answer("drop dictionary if exists {}".format(dict_name))
|
||||||
get_ch_answer('''CREATE DICTIONARY {} ({})
|
get_ch_answer(
|
||||||
|
"""CREATE DICTIONARY {} ({})
|
||||||
PRIMARY KEY word
|
PRIMARY KEY word
|
||||||
SOURCE(HTTP(url '{}' format 'CSV'))
|
SOURCE(HTTP(url '{}' format 'CSV'))
|
||||||
LAYOUT(complex_key_hashed())
|
LAYOUT(complex_key_hashed())
|
||||||
LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING))
|
LIFETIME(0)""".format(
|
||||||
|
dict_name, schema, HTTP_SERVER_URL_STR + "/test.csv" + ADDING_ENDING
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
COMPRESS_METHOD = requests[i]
|
COMPRESS_METHOD = requests[i]
|
||||||
print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING)
|
print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING)
|
||||||
check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i])
|
check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i])
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# first three for encoding, second three for url
|
# first three for encoding, second three for url
|
||||||
insert_requests = [
|
insert_requests = ["none", "gzip", "lzma", "gzip", "lzma"]
|
||||||
'none',
|
|
||||||
'gzip',
|
|
||||||
'lzma',
|
|
||||||
'gzip',
|
|
||||||
'lzma'
|
|
||||||
]
|
|
||||||
|
|
||||||
# This answers got experemently in non compressed mode and they are correct
|
# This answers got experemently in non compressed mode and they are correct
|
||||||
answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5
|
answers = ["""Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213"""] * 5
|
||||||
|
|
||||||
t = start_server(len(insert_requests))
|
t = start_server(len(insert_requests))
|
||||||
t.start()
|
t.start()
|
||||||
test_select(dict_name="test_table_select", requests=insert_requests, answers=answers)
|
test_select(
|
||||||
|
dict_name="test_table_select", requests=insert_requests, answers=answers
|
||||||
|
)
|
||||||
t.join()
|
t.join()
|
||||||
print("PASSED")
|
print("PASSED")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
main()
|
main()
|
||||||
@ -191,5 +216,3 @@ if __name__ == "__main__":
|
|||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
|
|
||||||
os._exit(1)
|
os._exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,9 +5,10 @@ import socket
|
|||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||||
CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
|
CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000"))
|
||||||
CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
|
CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default")
|
||||||
|
|
||||||
|
|
||||||
def writeVarUInt(x, ba):
|
def writeVarUInt(x, ba):
|
||||||
for _ in range(0, 9):
|
for _ in range(0, 9):
|
||||||
@ -24,12 +25,12 @@ def writeVarUInt(x, ba):
|
|||||||
|
|
||||||
|
|
||||||
def writeStringBinary(s, ba):
|
def writeStringBinary(s, ba):
|
||||||
b = bytes(s, 'utf-8')
|
b = bytes(s, "utf-8")
|
||||||
writeVarUInt(len(s), ba)
|
writeVarUInt(len(s), ba)
|
||||||
ba.extend(b)
|
ba.extend(b)
|
||||||
|
|
||||||
|
|
||||||
def readStrict(s, size = 1):
|
def readStrict(s, size=1):
|
||||||
res = bytearray()
|
res = bytearray()
|
||||||
while size:
|
while size:
|
||||||
cur = s.recv(size)
|
cur = s.recv(size)
|
||||||
@ -48,18 +49,23 @@ def readUInt(s, size=1):
|
|||||||
val += res[i] << (i * 8)
|
val += res[i] << (i * 8)
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
def readUInt8(s):
|
def readUInt8(s):
|
||||||
return readUInt(s)
|
return readUInt(s)
|
||||||
|
|
||||||
|
|
||||||
def readUInt16(s):
|
def readUInt16(s):
|
||||||
return readUInt(s, 2)
|
return readUInt(s, 2)
|
||||||
|
|
||||||
|
|
||||||
def readUInt32(s):
|
def readUInt32(s):
|
||||||
return readUInt(s, 4)
|
return readUInt(s, 4)
|
||||||
|
|
||||||
|
|
||||||
def readUInt64(s):
|
def readUInt64(s):
|
||||||
return readUInt(s, 8)
|
return readUInt(s, 8)
|
||||||
|
|
||||||
|
|
||||||
def readVarUInt(s):
|
def readVarUInt(s):
|
||||||
x = 0
|
x = 0
|
||||||
for i in range(9):
|
for i in range(9):
|
||||||
@ -75,25 +81,25 @@ def readVarUInt(s):
|
|||||||
def readStringBinary(s):
|
def readStringBinary(s):
|
||||||
size = readVarUInt(s)
|
size = readVarUInt(s)
|
||||||
s = readStrict(s, size)
|
s = readStrict(s, size)
|
||||||
return s.decode('utf-8')
|
return s.decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
def sendHello(s):
|
def sendHello(s):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(0, ba) # Hello
|
writeVarUInt(0, ba) # Hello
|
||||||
writeStringBinary('simple native protocol', ba)
|
writeStringBinary("simple native protocol", ba)
|
||||||
writeVarUInt(21, ba)
|
writeVarUInt(21, ba)
|
||||||
writeVarUInt(9, ba)
|
writeVarUInt(9, ba)
|
||||||
writeVarUInt(54449, ba)
|
writeVarUInt(54449, ba)
|
||||||
writeStringBinary('default', ba) # database
|
writeStringBinary("default", ba) # database
|
||||||
writeStringBinary('default', ba) # user
|
writeStringBinary("default", ba) # user
|
||||||
writeStringBinary('', ba) # pwd
|
writeStringBinary("", ba) # pwd
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def receiveHello(s):
|
def receiveHello(s):
|
||||||
p_type = readVarUInt(s)
|
p_type = readVarUInt(s)
|
||||||
assert (p_type == 0) # Hello
|
assert p_type == 0 # Hello
|
||||||
server_name = readStringBinary(s)
|
server_name = readStringBinary(s)
|
||||||
# print("Server name: ", server_name)
|
# print("Server name: ", server_name)
|
||||||
server_version_major = readVarUInt(s)
|
server_version_major = readVarUInt(s)
|
||||||
@ -111,78 +117,79 @@ def receiveHello(s):
|
|||||||
|
|
||||||
|
|
||||||
def serializeClientInfo(ba, query_id):
|
def serializeClientInfo(ba, query_id):
|
||||||
writeStringBinary('default', ba) # initial_user
|
writeStringBinary("default", ba) # initial_user
|
||||||
writeStringBinary(query_id, ba) # initial_query_id
|
writeStringBinary(query_id, ba) # initial_query_id
|
||||||
writeStringBinary('127.0.0.1:9000', ba) # initial_address
|
writeStringBinary("127.0.0.1:9000", ba) # initial_address
|
||||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||||
ba.append(1) # TCP
|
ba.append(1) # TCP
|
||||||
writeStringBinary('os_user', ba) # os_user
|
writeStringBinary("os_user", ba) # os_user
|
||||||
writeStringBinary('client_hostname', ba) # client_hostname
|
writeStringBinary("client_hostname", ba) # client_hostname
|
||||||
writeStringBinary('client_name', ba) # client_name
|
writeStringBinary("client_name", ba) # client_name
|
||||||
writeVarUInt(21, ba)
|
writeVarUInt(21, ba)
|
||||||
writeVarUInt(9, ba)
|
writeVarUInt(9, ba)
|
||||||
writeVarUInt(54449, ba)
|
writeVarUInt(54449, ba)
|
||||||
writeStringBinary('', ba) # quota_key
|
writeStringBinary("", ba) # quota_key
|
||||||
writeVarUInt(0, ba) # distributed_depth
|
writeVarUInt(0, ba) # distributed_depth
|
||||||
writeVarUInt(1, ba) # client_version_patch
|
writeVarUInt(1, ba) # client_version_patch
|
||||||
ba.append(0) # No telemetry
|
ba.append(0) # No telemetry
|
||||||
|
|
||||||
|
|
||||||
def sendQuery(s, query):
|
def sendQuery(s, query):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
query_id = uuid.uuid4().hex
|
query_id = uuid.uuid4().hex
|
||||||
writeVarUInt(1, ba) # query
|
writeVarUInt(1, ba) # query
|
||||||
writeStringBinary(query_id, ba)
|
writeStringBinary(query_id, ba)
|
||||||
|
|
||||||
ba.append(1) # INITIAL_QUERY
|
ba.append(1) # INITIAL_QUERY
|
||||||
|
|
||||||
# client info
|
# client info
|
||||||
serializeClientInfo(ba, query_id)
|
serializeClientInfo(ba, query_id)
|
||||||
|
|
||||||
writeStringBinary('', ba) # No settings
|
writeStringBinary("", ba) # No settings
|
||||||
writeStringBinary('', ba) # No interserver secret
|
writeStringBinary("", ba) # No interserver secret
|
||||||
writeVarUInt(2, ba) # Stage - Complete
|
writeVarUInt(2, ba) # Stage - Complete
|
||||||
ba.append(0) # No compression
|
ba.append(0) # No compression
|
||||||
writeStringBinary(query, ba) # query, finally
|
writeStringBinary(query, ba) # query, finally
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def serializeBlockInfo(ba):
|
def serializeBlockInfo(ba):
|
||||||
writeVarUInt(1, ba) # 1
|
writeVarUInt(1, ba) # 1
|
||||||
ba.append(0) # is_overflows
|
ba.append(0) # is_overflows
|
||||||
writeVarUInt(2, ba) # 2
|
writeVarUInt(2, ba) # 2
|
||||||
writeVarUInt(0, ba) # 0
|
writeVarUInt(0, ba) # 0
|
||||||
ba.extend([0] * 4) # bucket_num
|
ba.extend([0] * 4) # bucket_num
|
||||||
|
|
||||||
|
|
||||||
def sendEmptyBlock(s):
|
def sendEmptyBlock(s):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(0, ba) # rows
|
writeVarUInt(0, ba) # rows
|
||||||
writeVarUInt(0, ba) # columns
|
writeVarUInt(0, ba) # columns
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def assertPacket(packet, expected):
|
def assertPacket(packet, expected):
|
||||||
assert(packet == expected), packet
|
assert packet == expected, packet
|
||||||
|
|
||||||
|
|
||||||
def readHeader(s):
|
def readHeader(s):
|
||||||
packet_type = readVarUInt(s)
|
packet_type = readVarUInt(s)
|
||||||
if packet_type == 2: # Exception
|
if packet_type == 2: # Exception
|
||||||
raise RuntimeError(readException(s))
|
raise RuntimeError(readException(s))
|
||||||
assertPacket(packet_type, 1) # Data
|
assertPacket(packet_type, 1) # Data
|
||||||
|
|
||||||
readStringBinary(s) # external table name
|
readStringBinary(s) # external table name
|
||||||
# BlockInfo
|
# BlockInfo
|
||||||
assertPacket(readVarUInt(s), 1) # 1
|
assertPacket(readVarUInt(s), 1) # 1
|
||||||
assertPacket(readUInt8(s), 0) # is_overflows
|
assertPacket(readUInt8(s), 0) # is_overflows
|
||||||
assertPacket(readVarUInt(s), 2) # 2
|
assertPacket(readVarUInt(s), 2) # 2
|
||||||
assertPacket(readUInt32(s), 4294967295) # bucket_num
|
assertPacket(readUInt32(s), 4294967295) # bucket_num
|
||||||
assertPacket(readVarUInt(s), 0) # 0
|
assertPacket(readVarUInt(s), 0) # 0
|
||||||
columns = readVarUInt(s) # rows
|
columns = readVarUInt(s) # rows
|
||||||
rows = readVarUInt(s) # columns
|
rows = readVarUInt(s) # columns
|
||||||
print("Rows {} Columns {}".format(rows, columns))
|
print("Rows {} Columns {}".format(rows, columns))
|
||||||
for _ in range(columns):
|
for _ in range(columns):
|
||||||
col_name = readStringBinary(s)
|
col_name = readStringBinary(s)
|
||||||
@ -194,9 +201,9 @@ def readException(s):
|
|||||||
code = readUInt32(s)
|
code = readUInt32(s)
|
||||||
name = readStringBinary(s)
|
name = readStringBinary(s)
|
||||||
text = readStringBinary(s)
|
text = readStringBinary(s)
|
||||||
readStringBinary(s) # trace
|
readStringBinary(s) # trace
|
||||||
assertPacket(readUInt8(s), 0) # has_nested
|
assertPacket(readUInt8(s), 0) # has_nested
|
||||||
return "code {}: {}".format(code, text.replace('DB::Exception:', ''))
|
return "code {}: {}".format(code, text.replace("DB::Exception:", ""))
|
||||||
|
|
||||||
|
|
||||||
def insertValidLowCardinalityRow():
|
def insertValidLowCardinalityRow():
|
||||||
@ -205,7 +212,12 @@ def insertValidLowCardinalityRow():
|
|||||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||||
sendHello(s)
|
sendHello(s)
|
||||||
receiveHello(s)
|
receiveHello(s)
|
||||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
sendQuery(
|
||||||
|
s,
|
||||||
|
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||||
|
CLICKHOUSE_DATABASE
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# external tables
|
# external tables
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
@ -213,25 +225,27 @@ def insertValidLowCardinalityRow():
|
|||||||
|
|
||||||
# Data
|
# Data
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(1, ba) # rows
|
writeVarUInt(1, ba) # rows
|
||||||
writeVarUInt(1, ba) # columns
|
writeVarUInt(1, ba) # columns
|
||||||
writeStringBinary('x', ba)
|
writeStringBinary("x", ba)
|
||||||
writeStringBinary('LowCardinality(String)', ba)
|
writeStringBinary("LowCardinality(String)", ba)
|
||||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||||
ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2]
|
ba.extend(
|
||||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
[3, 2] + [0] * 6
|
||||||
writeStringBinary('hello', ba) # key
|
) # indexes type: UInt64 [3], with additional keys [2]
|
||||||
ba.extend([1] + [0] * 7) # num_indexes
|
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
writeStringBinary("hello", ba) # key
|
||||||
|
ba.extend([1] + [0] * 7) # num_indexes
|
||||||
|
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
# Fin block
|
# Fin block
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
|
|
||||||
assertPacket(readVarUInt(s), 5) # End of stream
|
assertPacket(readVarUInt(s), 5) # End of stream
|
||||||
s.close()
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
@ -241,7 +255,12 @@ def insertLowCardinalityRowWithIndexOverflow():
|
|||||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||||
sendHello(s)
|
sendHello(s)
|
||||||
receiveHello(s)
|
receiveHello(s)
|
||||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
sendQuery(
|
||||||
|
s,
|
||||||
|
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||||
|
CLICKHOUSE_DATABASE
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# external tables
|
# external tables
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
@ -249,19 +268,21 @@ def insertLowCardinalityRowWithIndexOverflow():
|
|||||||
|
|
||||||
# Data
|
# Data
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(1, ba) # rows
|
writeVarUInt(1, ba) # rows
|
||||||
writeVarUInt(1, ba) # columns
|
writeVarUInt(1, ba) # columns
|
||||||
writeStringBinary('x', ba)
|
writeStringBinary("x", ba)
|
||||||
writeStringBinary('LowCardinality(String)', ba)
|
writeStringBinary("LowCardinality(String)", ba)
|
||||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||||
ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2]
|
ba.extend(
|
||||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
[3, 2] + [0] * 6
|
||||||
writeStringBinary('hello', ba) # key
|
) # indexes type: UInt64 [3], with additional keys [2]
|
||||||
ba.extend([1] + [0] * 7) # num_indexes
|
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||||
ba.extend([0] * 7 + [1]) # UInt64 index (overflow)
|
writeStringBinary("hello", ba) # key
|
||||||
|
ba.extend([1] + [0] * 7) # num_indexes
|
||||||
|
ba.extend([0] * 7 + [1]) # UInt64 index (overflow)
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
assertPacket(readVarUInt(s), 2)
|
assertPacket(readVarUInt(s), 2)
|
||||||
@ -275,7 +296,12 @@ def insertLowCardinalityRowWithIncorrectDictType():
|
|||||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||||
sendHello(s)
|
sendHello(s)
|
||||||
receiveHello(s)
|
receiveHello(s)
|
||||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
sendQuery(
|
||||||
|
s,
|
||||||
|
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||||
|
CLICKHOUSE_DATABASE
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# external tables
|
# external tables
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
@ -283,32 +309,40 @@ def insertLowCardinalityRowWithIncorrectDictType():
|
|||||||
|
|
||||||
# Data
|
# Data
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(1, ba) # rows
|
writeVarUInt(1, ba) # rows
|
||||||
writeVarUInt(1, ba) # columns
|
writeVarUInt(1, ba) # columns
|
||||||
writeStringBinary('x', ba)
|
writeStringBinary("x", ba)
|
||||||
writeStringBinary('LowCardinality(String)', ba)
|
writeStringBinary("LowCardinality(String)", ba)
|
||||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||||
ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2]
|
ba.extend(
|
||||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
[3, 3] + [0] * 6
|
||||||
writeStringBinary('hello', ba) # key
|
) # indexes type: UInt64 [3], with global dict and add keys [1 + 2]
|
||||||
ba.extend([1] + [0] * 7) # num_indexes
|
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||||
ba.extend([0] * 8) # UInt64 index (overflow)
|
writeStringBinary("hello", ba) # key
|
||||||
|
ba.extend([1] + [0] * 7) # num_indexes
|
||||||
|
ba.extend([0] * 8) # UInt64 index (overflow)
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
assertPacket(readVarUInt(s), 2)
|
assertPacket(readVarUInt(s), 2)
|
||||||
print(readException(s))
|
print(readException(s))
|
||||||
s.close()
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
def insertLowCardinalityRowWithIncorrectAdditionalKeys():
|
def insertLowCardinalityRowWithIncorrectAdditionalKeys():
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
s.settimeout(30)
|
s.settimeout(30)
|
||||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||||
sendHello(s)
|
sendHello(s)
|
||||||
receiveHello(s)
|
receiveHello(s)
|
||||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
sendQuery(
|
||||||
|
s,
|
||||||
|
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||||
|
CLICKHOUSE_DATABASE
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
# external tables
|
# external tables
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
@ -316,30 +350,34 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys():
|
|||||||
|
|
||||||
# Data
|
# Data
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(1, ba) # rows
|
writeVarUInt(1, ba) # rows
|
||||||
writeVarUInt(1, ba) # columns
|
writeVarUInt(1, ba) # columns
|
||||||
writeStringBinary('x', ba)
|
writeStringBinary("x", ba)
|
||||||
writeStringBinary('LowCardinality(String)', ba)
|
writeStringBinary("LowCardinality(String)", ba)
|
||||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||||
ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0]
|
ba.extend(
|
||||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
[3, 0] + [0] * 6
|
||||||
writeStringBinary('hello', ba) # key
|
) # indexes type: UInt64 [3], with NO additional keys [0]
|
||||||
ba.extend([1] + [0] * 7) # num_indexes
|
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
writeStringBinary("hello", ba) # key
|
||||||
|
ba.extend([1] + [0] * 7) # num_indexes
|
||||||
|
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
assertPacket(readVarUInt(s), 2)
|
assertPacket(readVarUInt(s), 2)
|
||||||
print(readException(s))
|
print(readException(s))
|
||||||
s.close()
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
insertValidLowCardinalityRow()
|
insertValidLowCardinalityRow()
|
||||||
insertLowCardinalityRowWithIndexOverflow()
|
insertLowCardinalityRowWithIndexOverflow()
|
||||||
insertLowCardinalityRowWithIncorrectDictType()
|
insertLowCardinalityRowWithIncorrectDictType()
|
||||||
insertLowCardinalityRowWithIncorrectAdditionalKeys()
|
insertLowCardinalityRowWithIncorrectAdditionalKeys()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -12,6 +12,7 @@ import subprocess
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
|
||||||
def is_ipv6(host):
|
def is_ipv6(host):
|
||||||
try:
|
try:
|
||||||
socket.inet_aton(host)
|
socket.inet_aton(host)
|
||||||
@ -19,6 +20,7 @@ def is_ipv6(host):
|
|||||||
except:
|
except:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_local_port(host, ipv6):
|
def get_local_port(host, ipv6):
|
||||||
if ipv6:
|
if ipv6:
|
||||||
family = socket.AF_INET6
|
family = socket.AF_INET6
|
||||||
@ -29,8 +31,9 @@ def get_local_port(host, ipv6):
|
|||||||
fd.bind((host, 0))
|
fd.bind((host, 0))
|
||||||
return fd.getsockname()[1]
|
return fd.getsockname()[1]
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
|
||||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||||
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||||
|
|
||||||
#####################################################################################
|
#####################################################################################
|
||||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||||
@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
|||||||
#####################################################################################
|
#####################################################################################
|
||||||
|
|
||||||
# IP-address of this host accessible from the outside world. Get the first one
|
# IP-address of this host accessible from the outside world. Get the first one
|
||||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
HTTP_SERVER_HOST = (
|
||||||
|
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||||
|
)
|
||||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||||
|
|
||||||
# IP address and port of the HTTP server started from this script.
|
# IP address and port of the HTTP server started from this script.
|
||||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://"
|
||||||
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
CSV_DATA = os.path.join(
|
||||||
|
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||||
|
)
|
||||||
|
|
||||||
CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
|
||||||
|
|
||||||
def get_ch_answer(query):
|
def get_ch_answer(query):
|
||||||
host = CLICKHOUSE_HOST
|
host = CLICKHOUSE_HOST
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
host = f'[{host}]'
|
host = f"[{host}]"
|
||||||
|
|
||||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
url = os.environ.get(
|
||||||
|
"CLICKHOUSE_URL",
|
||||||
|
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||||
|
)
|
||||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||||
|
|
||||||
|
|
||||||
def check_answers(query, answer):
|
def check_answers(query, answer):
|
||||||
ch_answer = get_ch_answer(query)
|
ch_answer = get_ch_answer(query)
|
||||||
if ch_answer.strip() != answer.strip():
|
if ch_answer.strip() != answer.strip():
|
||||||
@ -68,15 +86,16 @@ def check_answers(query, answer):
|
|||||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||||
raise Exception("Fail on query")
|
raise Exception("Fail on query")
|
||||||
|
|
||||||
|
|
||||||
class CSVHTTPServer(BaseHTTPRequestHandler):
|
class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||||
def _set_headers(self):
|
def _set_headers(self):
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-type', 'text/csv')
|
self.send_header("Content-type", "text/csv")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self._set_headers()
|
self._set_headers()
|
||||||
self.wfile.write(('hello, world').encode())
|
self.wfile.write(("hello, world").encode())
|
||||||
# with open(CSV_DATA, 'r') as fl:
|
# with open(CSV_DATA, 'r') as fl:
|
||||||
# reader = csv.reader(fl, delimiter=',')
|
# reader = csv.reader(fl, delimiter=',')
|
||||||
# for row in reader:
|
# for row in reader:
|
||||||
@ -84,33 +103,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def read_chunk(self):
|
def read_chunk(self):
|
||||||
msg = ''
|
msg = ""
|
||||||
while True:
|
while True:
|
||||||
sym = self.rfile.read(1)
|
sym = self.rfile.read(1)
|
||||||
if sym == '':
|
if sym == "":
|
||||||
break
|
break
|
||||||
msg += sym.decode('utf-8')
|
msg += sym.decode("utf-8")
|
||||||
if msg.endswith('\r\n'):
|
if msg.endswith("\r\n"):
|
||||||
break
|
break
|
||||||
length = int(msg[:-2], 16)
|
length = int(msg[:-2], 16)
|
||||||
if length == 0:
|
if length == 0:
|
||||||
return ''
|
return ""
|
||||||
content = self.rfile.read(length)
|
content = self.rfile.read(length)
|
||||||
self.rfile.read(2) # read sep \r\n
|
self.rfile.read(2) # read sep \r\n
|
||||||
return content.decode('utf-8')
|
return content.decode("utf-8")
|
||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
data = ''
|
data = ""
|
||||||
while True:
|
while True:
|
||||||
chunk = self.read_chunk()
|
chunk = self.read_chunk()
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
data += chunk
|
data += chunk
|
||||||
with StringIO(data) as fl:
|
with StringIO(data) as fl:
|
||||||
reader = csv.reader(fl, delimiter=',')
|
reader = csv.reader(fl, delimiter=",")
|
||||||
with open(CSV_DATA, 'a') as d:
|
with open(CSV_DATA, "a") as d:
|
||||||
for row in reader:
|
for row in reader:
|
||||||
d.write(','.join(row) + '\n')
|
d.write(",".join(row) + "\n")
|
||||||
self._set_headers()
|
self._set_headers()
|
||||||
self.wfile.write(b"ok")
|
self.wfile.write(b"ok")
|
||||||
|
|
||||||
@ -121,6 +140,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
|||||||
class HTTPServerV6(HTTPServer):
|
class HTTPServerV6(HTTPServer):
|
||||||
address_family = socket.AF_INET6
|
address_family = socket.AF_INET6
|
||||||
|
|
||||||
|
|
||||||
def start_server():
|
def start_server():
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
||||||
@ -130,57 +150,87 @@ def start_server():
|
|||||||
t = threading.Thread(target=httpd.serve_forever)
|
t = threading.Thread(target=httpd.serve_forever)
|
||||||
return t, httpd
|
return t, httpd
|
||||||
|
|
||||||
|
|
||||||
# test section
|
# test section
|
||||||
|
|
||||||
def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""):
|
|
||||||
with open(CSV_DATA, 'w') as f: # clear file
|
def test_select(
|
||||||
f.write('')
|
table_name="",
|
||||||
|
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||||
|
requests=[],
|
||||||
|
answers=[],
|
||||||
|
test_data="",
|
||||||
|
):
|
||||||
|
with open(CSV_DATA, "w") as f: # clear file
|
||||||
|
f.write("")
|
||||||
|
|
||||||
if test_data:
|
if test_data:
|
||||||
with open(CSV_DATA, 'w') as f:
|
with open(CSV_DATA, "w") as f:
|
||||||
f.write(test_data + "\n")
|
f.write(test_data + "\n")
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
get_ch_answer(
|
||||||
|
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||||
|
table_name, schema, HTTP_SERVER_URL_STR
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for i in range(len(requests)):
|
for i in range(len(requests)):
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
check_answers(requests[i].format(tbl=tbl), answers[i])
|
check_answers(requests[i].format(tbl=tbl), answers[i])
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
|
|
||||||
def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]):
|
|
||||||
with open(CSV_DATA, 'w') as f: # flush test file
|
def test_insert(
|
||||||
f.write('')
|
table_name="",
|
||||||
|
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||||
|
requests_insert=[],
|
||||||
|
requests_select=[],
|
||||||
|
answers=[],
|
||||||
|
):
|
||||||
|
with open(CSV_DATA, "w") as f: # flush test file
|
||||||
|
f.write("")
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
get_ch_answer(
|
||||||
|
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||||
|
table_name, schema, HTTP_SERVER_URL_STR
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for req in requests_insert:
|
for req in requests_insert:
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
get_ch_answer(req.format(tbl=tbl))
|
get_ch_answer(req.format(tbl=tbl))
|
||||||
|
|
||||||
|
|
||||||
for i in range(len(requests_select)):
|
for i in range(len(requests_select)):
|
||||||
tbl = table_name
|
tbl = table_name
|
||||||
if not tbl:
|
if not tbl:
|
||||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||||
|
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||||
|
)
|
||||||
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
||||||
|
|
||||||
if table_name:
|
if table_name:
|
||||||
get_ch_answer("drop table if exists {}".format(table_name))
|
get_ch_answer("drop table if exists {}".format(table_name))
|
||||||
|
|
||||||
|
|
||||||
def test_select_url_engine(requests=[], answers=[], test_data=""):
|
def test_select_url_engine(requests=[], answers=[], test_data=""):
|
||||||
for i in range(len(requests)):
|
for i in range(len(requests)):
|
||||||
check_answers(requests[i], answers[i])
|
check_answers(requests[i], answers[i])
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
||||||
"""
|
"""
|
||||||
@ -203,19 +253,29 @@ def main():
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')"
|
query = (
|
||||||
|
"select * from url('http://guest:guest@"
|
||||||
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/', 'RawBLOB', 'a String')"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')"
|
query = (
|
||||||
|
"select * from url('http://guest:guest@"
|
||||||
|
+ f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/', 'RawBLOB', 'a String')"
|
||||||
|
)
|
||||||
|
|
||||||
select_requests_url_auth = {
|
select_requests_url_auth = {
|
||||||
query : 'hello, world',
|
query: "hello, world",
|
||||||
}
|
}
|
||||||
|
|
||||||
t, httpd = start_server()
|
t, httpd = start_server()
|
||||||
t.start()
|
t.start()
|
||||||
test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data)
|
test_select(
|
||||||
|
requests=list(select_requests_url_auth.keys()),
|
||||||
|
answers=list(select_requests_url_auth.values()),
|
||||||
|
test_data=test_data,
|
||||||
|
)
|
||||||
httpd.shutdown()
|
httpd.shutdown()
|
||||||
t.join()
|
t.join()
|
||||||
print("PASSED")
|
print("PASSED")
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from math import sqrt, nan
|
from math import sqrt, nan
|
||||||
@ -8,7 +8,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha):
|
|||||||
return nan, nan, nan, nan
|
return nan, nan, nan, nan
|
||||||
z_stat = (p1 - p2) / se
|
z_stat = (p1 - p2) / se
|
||||||
|
|
||||||
one_side = 1 - stats.norm.cdf(abs(z_stat))
|
one_side = 1 - stats.norm.cdf(abs(z_stat))
|
||||||
p_value = one_side * 2
|
p_value = one_side * 2
|
||||||
|
|
||||||
z = stats.norm.ppf(1 - 0.5 * alpha)
|
z = stats.norm.ppf(1 - 0.5 * alpha)
|
||||||
@ -38,71 +38,171 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha):
|
|||||||
def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2):
|
def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2):
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
real = client.query_return_df(
|
real = client.query_return_df(
|
||||||
"SELECT roundBankers({}.1, 16) as z_stat, ".format(name) +
|
"SELECT roundBankers({}.1, 16) as z_stat, ".format(name)
|
||||||
"roundBankers({}.2, 16) as p_value, ".format(name) +
|
+ "roundBankers({}.2, 16) as p_value, ".format(name)
|
||||||
"roundBankers({}.3, 16) as ci_lower, ".format(name) +
|
+ "roundBankers({}.3, 16) as ci_lower, ".format(name)
|
||||||
"roundBankers({}.4, 16) as ci_upper ".format(name) +
|
+ "roundBankers({}.4, 16) as ci_upper ".format(name)
|
||||||
"FORMAT TabSeparatedWithNames;")
|
+ "FORMAT TabSeparatedWithNames;"
|
||||||
real_z_stat = real['z_stat'][0]
|
)
|
||||||
real_p_value = real['p_value'][0]
|
real_z_stat = real["z_stat"][0]
|
||||||
real_ci_lower = real['ci_lower'][0]
|
real_p_value = real["p_value"][0]
|
||||||
real_ci_upper = real['ci_upper'][0]
|
real_ci_lower = real["ci_lower"][0]
|
||||||
assert((np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(real_z_stat - np.float64(z_stat)) < precision), "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat)
|
real_ci_upper = real["ci_upper"][0]
|
||||||
assert((np.isnan(real_p_value) and np.isnan(p_value)) or abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
assert (np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(
|
||||||
assert((np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(real_ci_lower - np.float64(ci_lower)) < precision), "clickhouse_ci_lower {}, py_ci_lower {}".format(real_ci_lower, ci_lower)
|
real_z_stat - np.float64(z_stat)
|
||||||
assert((np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(real_ci_upper - np.float64(ci_upper)) < precision), "clickhouse_ci_upper {}, py_ci_upper {}".format(real_ci_upper, ci_upper)
|
) < precision, "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat)
|
||||||
|
assert (np.isnan(real_p_value) and np.isnan(p_value)) or abs(
|
||||||
|
real_p_value - np.float64(p_value)
|
||||||
|
) < precision, "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||||
|
assert (np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(
|
||||||
|
real_ci_lower - np.float64(ci_lower)
|
||||||
|
) < precision, "clickhouse_ci_lower {}, py_ci_lower {}".format(
|
||||||
|
real_ci_lower, ci_lower
|
||||||
|
)
|
||||||
|
assert (np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(
|
||||||
|
real_ci_upper - np.float64(ci_upper)
|
||||||
|
) < precision, "clickhouse_ci_upper {}, py_ci_upper {}".format(
|
||||||
|
real_ci_upper, ci_upper
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_mean_ztest():
|
def test_mean_ztest():
|
||||||
counts = [0, 0]
|
counts = [0, 0]
|
||||||
nobs = [0, 0]
|
nobs = [0, 0]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05)
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
|
10, 10, 10, 10, 0.05
|
||||||
|
)
|
||||||
|
|
||||||
counts = [10, 10]
|
counts = [10, 10]
|
||||||
nobs = [10, 10]
|
nobs = [10, 10]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05)
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
|
10, 10, 10, 10, 0.05
|
||||||
|
)
|
||||||
|
|
||||||
counts = [16, 16]
|
counts = [16, 16]
|
||||||
nobs = [16, 18]
|
nobs = [16, 18]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [10, 20]
|
counts = [10, 20]
|
||||||
nobs = [30, 40]
|
nobs = [30, 40]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [20, 10]
|
counts = [20, 10]
|
||||||
nobs = [40, 30]
|
nobs = [40, 30]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [randrange(10,20), randrange(10,20)]
|
counts = [randrange(10, 20), randrange(10, 20)]
|
||||||
nobs = [randrange(counts[0] + 1, counts[0] * 2), randrange(counts[1], counts[1] * 2)]
|
nobs = [
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
randrange(counts[0] + 1, counts[0] * 2),
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
randrange(counts[1], counts[1] * 2),
|
||||||
|
]
|
||||||
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [randrange(1,100), randrange(1,200)]
|
counts = [randrange(1, 100), randrange(1, 200)]
|
||||||
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)]
|
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [randrange(1,200), randrange(1,100)]
|
counts = [randrange(1, 200), randrange(1, 100)]
|
||||||
nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)]
|
nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
counts = [randrange(1,1000), randrange(1,1000)]
|
counts = [randrange(1, 1000), randrange(1, 1000)]
|
||||||
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)]
|
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)]
|
||||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||||
|
)
|
||||||
|
test_and_check(
|
||||||
|
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||||
|
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||||
|
z_stat,
|
||||||
|
p_value,
|
||||||
|
ci_lower,
|
||||||
|
ci_upper,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_mean_ztest()
|
test_mean_ztest()
|
||||||
print("Ok.")
|
print("Ok.")
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from statistics import variance
|
from statistics import variance
|
||||||
@ -7,7 +7,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
@ -30,46 +30,95 @@ def twosample_mean_ztest(rvs1, rvs2, alpha=0.05):
|
|||||||
def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2):
|
def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2):
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
client.query("DROP TABLE IF EXISTS ztest;")
|
client.query("DROP TABLE IF EXISTS ztest;")
|
||||||
client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;");
|
client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;")
|
||||||
client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a])))
|
client.query(
|
||||||
client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b])))
|
"INSERT INTO ztest VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(i, 0) for i in a])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
client.query(
|
||||||
|
"INSERT INTO ztest VALUES {};".format(
|
||||||
|
", ".join(["({},{})".format(j, 1) for j in b])
|
||||||
|
)
|
||||||
|
)
|
||||||
real = client.query_return_df(
|
real = client.query_return_df(
|
||||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||||
"roundBankers({}(left, right).2, 16) as p_value, ".format(name) +
|
+ "roundBankers({}(left, right).2, 16) as p_value, ".format(name)
|
||||||
"roundBankers({}(left, right).3, 16) as ci_low, ".format(name) +
|
+ "roundBankers({}(left, right).3, 16) as ci_low, ".format(name)
|
||||||
"roundBankers({}(left, right).4, 16) as ci_high ".format(name) +
|
+ "roundBankers({}(left, right).4, 16) as ci_high ".format(name)
|
||||||
"FROM ztest FORMAT TabSeparatedWithNames;")
|
+ "FROM ztest FORMAT TabSeparatedWithNames;"
|
||||||
real_t_stat = real['t_stat'][0]
|
)
|
||||||
real_p_value = real['p_value'][0]
|
real_t_stat = real["t_stat"][0]
|
||||||
real_ci_low = real['ci_low'][0]
|
real_p_value = real["p_value"][0]
|
||||||
real_ci_high = real['ci_high'][0]
|
real_ci_low = real["ci_low"][0]
|
||||||
assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat)
|
real_ci_high = real["ci_high"][0]
|
||||||
assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
assert (
|
||||||
assert(abs(real_ci_low - np.float64(ci_low)) < precision), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low)
|
abs(real_t_stat - np.float64(t_stat)) < precision
|
||||||
assert(abs(real_ci_high - np.float64(ci_high)) < precision), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high)
|
), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat)
|
||||||
|
assert (
|
||||||
|
abs(real_p_value - np.float64(p_value)) < precision
|
||||||
|
), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||||
|
assert (
|
||||||
|
abs(real_ci_low - np.float64(ci_low)) < precision
|
||||||
|
), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low)
|
||||||
|
assert (
|
||||||
|
abs(real_ci_high - np.float64(ci_high)) < precision
|
||||||
|
), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high)
|
||||||
client.query("DROP TABLE IF EXISTS ztest;")
|
client.query("DROP TABLE IF EXISTS ztest;")
|
||||||
|
|
||||||
|
|
||||||
def test_mean_ztest():
|
def test_mean_ztest():
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
test_and_check(
|
||||||
|
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||||
|
rvs1,
|
||||||
|
rvs2,
|
||||||
|
s,
|
||||||
|
p,
|
||||||
|
cl,
|
||||||
|
ch,
|
||||||
|
)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
test_and_check(
|
||||||
|
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||||
|
rvs1,
|
||||||
|
rvs2,
|
||||||
|
s,
|
||||||
|
p,
|
||||||
|
cl,
|
||||||
|
ch,
|
||||||
|
)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2)
|
||||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
test_and_check(
|
||||||
|
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||||
|
rvs1,
|
||||||
|
rvs2,
|
||||||
|
s,
|
||||||
|
p,
|
||||||
|
cl,
|
||||||
|
ch,
|
||||||
|
)
|
||||||
|
|
||||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2)
|
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2)
|
||||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
test_and_check(
|
||||||
|
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||||
|
rvs1,
|
||||||
|
rvs2,
|
||||||
|
s,
|
||||||
|
p,
|
||||||
|
cl,
|
||||||
|
ch,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -3,47 +3,71 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||||
CLICKHOUSE_TMP = os.environ.get('CLICKHOUSE_TMP')
|
CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP")
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
|
|
||||||
|
|
||||||
def run_test(data_format, gen_data_template, settings):
|
def run_test(data_format, gen_data_template, settings):
|
||||||
print(data_format)
|
print(data_format)
|
||||||
client.query("TRUNCATE TABLE t_async_insert")
|
client.query("TRUNCATE TABLE t_async_insert")
|
||||||
|
|
||||||
expected = client.query(gen_data_template.format("TSV")).strip()
|
expected = client.query(gen_data_template.format("TSV")).strip()
|
||||||
data = client.query(gen_data_template.format(data_format), settings=settings,binary_result=True)
|
data = client.query(
|
||||||
|
gen_data_template.format(data_format), settings=settings, binary_result=True
|
||||||
|
)
|
||||||
|
|
||||||
insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format)
|
insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format)
|
||||||
client.query_with_data(insert_query, data, settings=settings)
|
client.query_with_data(insert_query, data, settings=settings)
|
||||||
|
|
||||||
result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip()
|
result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip()
|
||||||
if result != expected:
|
if result != expected:
|
||||||
print("Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(data_format, expected, result))
|
print(
|
||||||
|
"Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(
|
||||||
|
data_format, expected, result
|
||||||
|
)
|
||||||
|
)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
formats = client.query("SELECT name FROM system.formats WHERE is_input AND is_output \
|
|
||||||
AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name").strip().split('\n')
|
formats = (
|
||||||
|
client.query(
|
||||||
|
"SELECT name FROM system.formats WHERE is_input AND is_output \
|
||||||
|
AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name"
|
||||||
|
)
|
||||||
|
.strip()
|
||||||
|
.split("\n")
|
||||||
|
)
|
||||||
|
|
||||||
# Generic formats
|
# Generic formats
|
||||||
client.query("DROP TABLE IF EXISTS t_async_insert")
|
client.query("DROP TABLE IF EXISTS t_async_insert")
|
||||||
client.query("CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory")
|
client.query(
|
||||||
|
"CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory"
|
||||||
|
)
|
||||||
gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}"
|
gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}"
|
||||||
|
|
||||||
for data_format in formats:
|
for data_format in formats:
|
||||||
run_test(data_format, gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1})
|
run_test(
|
||||||
|
data_format,
|
||||||
|
gen_data_query,
|
||||||
|
settings={"async_insert": 1, "wait_for_async_insert": 1},
|
||||||
|
)
|
||||||
|
|
||||||
# LineAsString
|
# LineAsString
|
||||||
client.query("DROP TABLE IF EXISTS t_async_insert")
|
client.query("DROP TABLE IF EXISTS t_async_insert")
|
||||||
client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory")
|
client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory")
|
||||||
gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}"
|
gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}"
|
||||||
|
|
||||||
run_test('LineAsString', gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1})
|
run_test(
|
||||||
|
"LineAsString",
|
||||||
|
gen_data_query,
|
||||||
|
settings={"async_insert": 1, "wait_for_async_insert": 1},
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: add CapnProto and Protobuf
|
# TODO: add CapnProto and Protobuf
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from http.server import SimpleHTTPRequestHandler,HTTPServer
|
from http.server import SimpleHTTPRequestHandler, HTTPServer
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
@ -17,6 +17,7 @@ def is_ipv6(host):
|
|||||||
except:
|
except:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_local_port(host, ipv6):
|
def get_local_port(host, ipv6):
|
||||||
if ipv6:
|
if ipv6:
|
||||||
family = socket.AF_INET6
|
family = socket.AF_INET6
|
||||||
@ -27,20 +28,19 @@ def get_local_port(host, ipv6):
|
|||||||
fd.bind((host, 0))
|
fd.bind((host, 0))
|
||||||
return fd.getsockname()[1]
|
return fd.getsockname()[1]
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
|
|
||||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost")
|
||||||
|
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||||
|
|
||||||
# Server returns this JSON response.
|
# Server returns this JSON response.
|
||||||
SERVER_JSON_RESPONSE = \
|
SERVER_JSON_RESPONSE = """{
|
||||||
'''{
|
|
||||||
"login": "ClickHouse",
|
"login": "ClickHouse",
|
||||||
"id": 54801242,
|
"id": 54801242,
|
||||||
"name": "ClickHouse",
|
"name": "ClickHouse",
|
||||||
"company": null
|
"company": null
|
||||||
}'''
|
}"""
|
||||||
|
|
||||||
EXPECTED_ANSWER = \
|
EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}"""
|
||||||
'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}'''
|
|
||||||
|
|
||||||
#####################################################################################
|
#####################################################################################
|
||||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||||
@ -51,26 +51,38 @@ EXPECTED_ANSWER = \
|
|||||||
#####################################################################################
|
#####################################################################################
|
||||||
|
|
||||||
# IP-address of this host accessible from the outside world. Get the first one
|
# IP-address of this host accessible from the outside world. Get the first one
|
||||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
HTTP_SERVER_HOST = (
|
||||||
|
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||||
|
)
|
||||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||||
|
|
||||||
# IP address and port of the HTTP server started from this script.
|
# IP address and port of the HTTP server started from this script.
|
||||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://"
|
||||||
|
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||||
|
+ "/"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
HTTP_SERVER_URL_STR = (
|
||||||
|
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_ch_answer(query):
|
def get_ch_answer(query):
|
||||||
host = CLICKHOUSE_HOST
|
host = CLICKHOUSE_HOST
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
host = f'[{host}]'
|
host = f"[{host}]"
|
||||||
|
|
||||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
url = os.environ.get(
|
||||||
|
"CLICKHOUSE_URL",
|
||||||
|
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||||
|
)
|
||||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||||
|
|
||||||
|
|
||||||
def check_answers(query, answer):
|
def check_answers(query, answer):
|
||||||
ch_answer = get_ch_answer(query)
|
ch_answer = get_ch_answer(query)
|
||||||
if ch_answer.strip() != answer.strip():
|
if ch_answer.strip() != answer.strip():
|
||||||
@ -79,16 +91,17 @@ def check_answers(query, answer):
|
|||||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||||
raise Exception("Fail on query")
|
raise Exception("Fail on query")
|
||||||
|
|
||||||
|
|
||||||
# Server with check for User-Agent headers.
|
# Server with check for User-Agent headers.
|
||||||
class HttpProcessor(SimpleHTTPRequestHandler):
|
class HttpProcessor(SimpleHTTPRequestHandler):
|
||||||
def _set_headers(self):
|
def _set_headers(self):
|
||||||
user_agent = self.headers.get('User-Agent')
|
user_agent = self.headers.get("User-Agent")
|
||||||
if user_agent and user_agent.startswith('ClickHouse/'):
|
if user_agent and user_agent.startswith("ClickHouse/"):
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
else:
|
else:
|
||||||
self.send_response(403)
|
self.send_response(403)
|
||||||
|
|
||||||
self.send_header('Content-Type', 'text/csv')
|
self.send_header("Content-Type", "text/csv")
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
@ -98,9 +111,11 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
|||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class HTTPServerV6(HTTPServer):
|
class HTTPServerV6(HTTPServer):
|
||||||
address_family = socket.AF_INET6
|
address_family = socket.AF_INET6
|
||||||
|
|
||||||
|
|
||||||
def start_server(requests_amount):
|
def start_server(requests_amount):
|
||||||
if IS_IPV6:
|
if IS_IPV6:
|
||||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
||||||
@ -114,15 +129,18 @@ def start_server(requests_amount):
|
|||||||
t = threading.Thread(target=real_func)
|
t = threading.Thread(target=real_func)
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
#####################################################################
|
||||||
# Testing area.
|
# Testing area.
|
||||||
#####################################################################
|
#####################################################################
|
||||||
|
|
||||||
|
|
||||||
def test_select():
|
def test_select():
|
||||||
global HTTP_SERVER_URL_STR
|
global HTTP_SERVER_URL_STR
|
||||||
query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR)
|
query = "SELECT * FROM url('{}','JSONAsString');".format(HTTP_SERVER_URL_STR)
|
||||||
check_answers(query, EXPECTED_ANSWER)
|
check_answers(query, EXPECTED_ANSWER)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# HEAD + GET
|
# HEAD + GET
|
||||||
t = start_server(3)
|
t = start_server(3)
|
||||||
@ -131,6 +149,7 @@ def main():
|
|||||||
t.join()
|
t.join()
|
||||||
print("PASSED")
|
print("PASSED")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
main()
|
main()
|
||||||
@ -141,4 +160,3 @@ if __name__ == "__main__":
|
|||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
|
|
||||||
os._exit(1)
|
os._exit(1)
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ class HttpProcessor(BaseHTTPRequestHandler):
|
|||||||
get_call_num = 0
|
get_call_num = 0
|
||||||
responses_to_get = []
|
responses_to_get = []
|
||||||
|
|
||||||
def send_head(self, from_get = False):
|
def send_head(self, from_get=False):
|
||||||
if self.headers["Range"] and HttpProcessor.allow_range:
|
if self.headers["Range"] and HttpProcessor.allow_range:
|
||||||
try:
|
try:
|
||||||
self.range = parse_byte_range(self.headers["Range"])
|
self.range = parse_byte_range(self.headers["Range"])
|
||||||
@ -146,7 +146,9 @@ class HttpProcessor(BaseHTTPRequestHandler):
|
|||||||
self.send_error(416, "Requested Range Not Satisfiable")
|
self.send_error(416, "Requested Range Not Satisfiable")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
|
retry_range_request = (
|
||||||
|
first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
|
||||||
|
)
|
||||||
if retry_range_request:
|
if retry_range_request:
|
||||||
code = HttpProcessor.responses_to_get.pop()
|
code = HttpProcessor.responses_to_get.pop()
|
||||||
if code not in HttpProcessor.responses:
|
if code not in HttpProcessor.responses:
|
||||||
@ -244,7 +246,9 @@ def run_test(allow_range, settings, check_retries=False):
|
|||||||
raise Exception("HTTP Range was not used when supported")
|
raise Exception("HTTP Range was not used when supported")
|
||||||
|
|
||||||
if check_retries and len(HttpProcessor.responses_to_get) > 0:
|
if check_retries and len(HttpProcessor.responses_to_get) > 0:
|
||||||
raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried")
|
raise Exception(
|
||||||
|
"Expected to get http response 500, which had to be retried, but 200 ok returned and then retried"
|
||||||
|
)
|
||||||
|
|
||||||
if retries_num > 0:
|
if retries_num > 0:
|
||||||
expected_get_call_num += retries_num - 1
|
expected_get_call_num += retries_num - 1
|
||||||
@ -263,7 +267,7 @@ def run_test(allow_range, settings, check_retries=False):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
settings = {"max_download_buffer_size" : 20}
|
settings = {"max_download_buffer_size": 20}
|
||||||
|
|
||||||
# Test Accept-Ranges=False
|
# Test Accept-Ranges=False
|
||||||
run_test(allow_range=False, settings=settings)
|
run_test(allow_range=False, settings=settings)
|
||||||
@ -271,7 +275,7 @@ def main():
|
|||||||
run_test(allow_range=True, settings=settings)
|
run_test(allow_range=True, settings=settings)
|
||||||
|
|
||||||
# Test Accept-Ranges=True, parallel download is used
|
# Test Accept-Ranges=True, parallel download is used
|
||||||
settings = {"max_download_buffer_size" : 10}
|
settings = {"max_download_buffer_size": 10}
|
||||||
run_test(allow_range=True, settings=settings)
|
run_test(allow_range=True, settings=settings)
|
||||||
|
|
||||||
# Test Accept-Ranges=True, parallel download is not used,
|
# Test Accept-Ranges=True, parallel download is not used,
|
||||||
|
@ -7,7 +7,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
@ -22,15 +22,22 @@ def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2):
|
|||||||
client.query("DROP TABLE IF EXISTS anova;")
|
client.query("DROP TABLE IF EXISTS anova;")
|
||||||
client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;")
|
client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;")
|
||||||
for group in range(n_groups):
|
for group in range(n_groups):
|
||||||
client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''')
|
client.query(
|
||||||
|
f"""INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};"""
|
||||||
|
)
|
||||||
|
|
||||||
real = client.query_return_df(
|
real = client.query_return_df(
|
||||||
'''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''')
|
"""SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;"""
|
||||||
|
)
|
||||||
|
|
||||||
real_f_stat = real['f_stat'][0]
|
real_f_stat = real["f_stat"][0]
|
||||||
real_p_value = real['p_value'][0]
|
real_p_value = real["p_value"][0]
|
||||||
assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}"
|
assert (
|
||||||
assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}"
|
abs(real_f_stat - np.float64(f_stat)) < precision
|
||||||
|
), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}"
|
||||||
|
assert (
|
||||||
|
abs(real_p_value - np.float64(p_value)) < precision
|
||||||
|
), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}"
|
||||||
client.query("DROP TABLE IF EXISTS anova;")
|
client.query("DROP TABLE IF EXISTS anova;")
|
||||||
|
|
||||||
|
|
||||||
|
@ -123,10 +123,14 @@ Uses FinishSortingTransform: {}
|
|||||||
|
|
||||||
for query in queries:
|
for query in queries:
|
||||||
check_query(query["where"], query["order_by"], query["optimize"], False)
|
check_query(query["where"], query["order_by"], query["optimize"], False)
|
||||||
check_query(query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"])
|
check_query(
|
||||||
|
query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"]
|
||||||
|
)
|
||||||
|
|
||||||
where_columns = [f"bitNot({col})" for col in query["where"]]
|
where_columns = [f"bitNot({col})" for col in query["where"]]
|
||||||
check_query(where_columns, query["order_by"], query["optimize"], False)
|
check_query(where_columns, query["order_by"], query["optimize"], False)
|
||||||
check_query(where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"])
|
check_query(
|
||||||
|
where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"]
|
||||||
|
)
|
||||||
|
|
||||||
print("OK")
|
print("OK")
|
||||||
|
@ -8,8 +8,8 @@ TRANSFER_ENCODING_HEADER = "Transfer-Encoding"
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
host = os.environ['CLICKHOUSE_HOST']
|
host = os.environ["CLICKHOUSE_HOST"]
|
||||||
port = int(os.environ['CLICKHOUSE_PORT_HTTP'])
|
port = int(os.environ["CLICKHOUSE_PORT_HTTP"])
|
||||||
|
|
||||||
sock = socket(AF_INET, SOCK_STREAM)
|
sock = socket(AF_INET, SOCK_STREAM)
|
||||||
sock.connect((host, port))
|
sock.connect((host, port))
|
||||||
@ -47,4 +47,3 @@ def main():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -5,9 +5,10 @@ import os
|
|||||||
import uuid
|
import uuid
|
||||||
import json
|
import json
|
||||||
|
|
||||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||||
CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
|
CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000"))
|
||||||
CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
|
CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default")
|
||||||
|
|
||||||
|
|
||||||
def writeVarUInt(x, ba):
|
def writeVarUInt(x, ba):
|
||||||
for _ in range(0, 9):
|
for _ in range(0, 9):
|
||||||
@ -24,12 +25,12 @@ def writeVarUInt(x, ba):
|
|||||||
|
|
||||||
|
|
||||||
def writeStringBinary(s, ba):
|
def writeStringBinary(s, ba):
|
||||||
b = bytes(s, 'utf-8')
|
b = bytes(s, "utf-8")
|
||||||
writeVarUInt(len(s), ba)
|
writeVarUInt(len(s), ba)
|
||||||
ba.extend(b)
|
ba.extend(b)
|
||||||
|
|
||||||
|
|
||||||
def readStrict(s, size = 1):
|
def readStrict(s, size=1):
|
||||||
res = bytearray()
|
res = bytearray()
|
||||||
while size:
|
while size:
|
||||||
cur = s.recv(size)
|
cur = s.recv(size)
|
||||||
@ -48,18 +49,23 @@ def readUInt(s, size=1):
|
|||||||
val += res[i] << (i * 8)
|
val += res[i] << (i * 8)
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
|
||||||
def readUInt8(s):
|
def readUInt8(s):
|
||||||
return readUInt(s)
|
return readUInt(s)
|
||||||
|
|
||||||
|
|
||||||
def readUInt16(s):
|
def readUInt16(s):
|
||||||
return readUInt(s, 2)
|
return readUInt(s, 2)
|
||||||
|
|
||||||
|
|
||||||
def readUInt32(s):
|
def readUInt32(s):
|
||||||
return readUInt(s, 4)
|
return readUInt(s, 4)
|
||||||
|
|
||||||
|
|
||||||
def readUInt64(s):
|
def readUInt64(s):
|
||||||
return readUInt(s, 8)
|
return readUInt(s, 8)
|
||||||
|
|
||||||
|
|
||||||
def readVarUInt(s):
|
def readVarUInt(s):
|
||||||
x = 0
|
x = 0
|
||||||
for i in range(9):
|
for i in range(9):
|
||||||
@ -75,25 +81,25 @@ def readVarUInt(s):
|
|||||||
def readStringBinary(s):
|
def readStringBinary(s):
|
||||||
size = readVarUInt(s)
|
size = readVarUInt(s)
|
||||||
s = readStrict(s, size)
|
s = readStrict(s, size)
|
||||||
return s.decode('utf-8')
|
return s.decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
def sendHello(s):
|
def sendHello(s):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(0, ba) # Hello
|
writeVarUInt(0, ba) # Hello
|
||||||
writeStringBinary('simple native protocol', ba)
|
writeStringBinary("simple native protocol", ba)
|
||||||
writeVarUInt(21, ba)
|
writeVarUInt(21, ba)
|
||||||
writeVarUInt(9, ba)
|
writeVarUInt(9, ba)
|
||||||
writeVarUInt(54449, ba)
|
writeVarUInt(54449, ba)
|
||||||
writeStringBinary(CLICKHOUSE_DATABASE, ba) # database
|
writeStringBinary(CLICKHOUSE_DATABASE, ba) # database
|
||||||
writeStringBinary('default', ba) # user
|
writeStringBinary("default", ba) # user
|
||||||
writeStringBinary('', ba) # pwd
|
writeStringBinary("", ba) # pwd
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def receiveHello(s):
|
def receiveHello(s):
|
||||||
p_type = readVarUInt(s)
|
p_type = readVarUInt(s)
|
||||||
assert (p_type == 0) # Hello
|
assert p_type == 0 # Hello
|
||||||
server_name = readStringBinary(s)
|
server_name = readStringBinary(s)
|
||||||
# print("Server name: ", server_name)
|
# print("Server name: ", server_name)
|
||||||
server_version_major = readVarUInt(s)
|
server_version_major = readVarUInt(s)
|
||||||
@ -111,65 +117,65 @@ def receiveHello(s):
|
|||||||
|
|
||||||
|
|
||||||
def serializeClientInfo(ba, query_id):
|
def serializeClientInfo(ba, query_id):
|
||||||
writeStringBinary('default', ba) # initial_user
|
writeStringBinary("default", ba) # initial_user
|
||||||
writeStringBinary(query_id, ba) # initial_query_id
|
writeStringBinary(query_id, ba) # initial_query_id
|
||||||
writeStringBinary('127.0.0.1:9000', ba) # initial_address
|
writeStringBinary("127.0.0.1:9000", ba) # initial_address
|
||||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||||
ba.append(1) # TCP
|
ba.append(1) # TCP
|
||||||
writeStringBinary('os_user', ba) # os_user
|
writeStringBinary("os_user", ba) # os_user
|
||||||
writeStringBinary('client_hostname', ba) # client_hostname
|
writeStringBinary("client_hostname", ba) # client_hostname
|
||||||
writeStringBinary('client_name', ba) # client_name
|
writeStringBinary("client_name", ba) # client_name
|
||||||
writeVarUInt(21, ba)
|
writeVarUInt(21, ba)
|
||||||
writeVarUInt(9, ba)
|
writeVarUInt(9, ba)
|
||||||
writeVarUInt(54449, ba)
|
writeVarUInt(54449, ba)
|
||||||
writeStringBinary('', ba) # quota_key
|
writeStringBinary("", ba) # quota_key
|
||||||
writeVarUInt(0, ba) # distributed_depth
|
writeVarUInt(0, ba) # distributed_depth
|
||||||
writeVarUInt(1, ba) # client_version_patch
|
writeVarUInt(1, ba) # client_version_patch
|
||||||
ba.append(0) # No telemetry
|
ba.append(0) # No telemetry
|
||||||
|
|
||||||
|
|
||||||
def sendQuery(s, query):
|
def sendQuery(s, query):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
query_id = uuid.uuid4().hex
|
query_id = uuid.uuid4().hex
|
||||||
writeVarUInt(1, ba) # query
|
writeVarUInt(1, ba) # query
|
||||||
writeStringBinary(query_id, ba)
|
writeStringBinary(query_id, ba)
|
||||||
|
|
||||||
ba.append(1) # INITIAL_QUERY
|
ba.append(1) # INITIAL_QUERY
|
||||||
|
|
||||||
# client info
|
# client info
|
||||||
serializeClientInfo(ba, query_id)
|
serializeClientInfo(ba, query_id)
|
||||||
|
|
||||||
writeStringBinary('', ba) # No settings
|
writeStringBinary("", ba) # No settings
|
||||||
writeStringBinary('', ba) # No interserver secret
|
writeStringBinary("", ba) # No interserver secret
|
||||||
writeVarUInt(2, ba) # Stage - Complete
|
writeVarUInt(2, ba) # Stage - Complete
|
||||||
ba.append(0) # No compression
|
ba.append(0) # No compression
|
||||||
writeStringBinary(query, ba) # query, finally
|
writeStringBinary(query, ba) # query, finally
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def serializeBlockInfo(ba):
|
def serializeBlockInfo(ba):
|
||||||
writeVarUInt(1, ba) # 1
|
writeVarUInt(1, ba) # 1
|
||||||
ba.append(0) # is_overflows
|
ba.append(0) # is_overflows
|
||||||
writeVarUInt(2, ba) # 2
|
writeVarUInt(2, ba) # 2
|
||||||
writeVarUInt(0, ba) # 0
|
writeVarUInt(0, ba) # 0
|
||||||
ba.extend([0] * 4) # bucket_num
|
ba.extend([0] * 4) # bucket_num
|
||||||
|
|
||||||
|
|
||||||
def sendEmptyBlock(s):
|
def sendEmptyBlock(s):
|
||||||
ba = bytearray()
|
ba = bytearray()
|
||||||
writeVarUInt(2, ba) # Data
|
writeVarUInt(2, ba) # Data
|
||||||
writeStringBinary('', ba)
|
writeStringBinary("", ba)
|
||||||
serializeBlockInfo(ba)
|
serializeBlockInfo(ba)
|
||||||
writeVarUInt(0, ba) # rows
|
writeVarUInt(0, ba) # rows
|
||||||
writeVarUInt(0, ba) # columns
|
writeVarUInt(0, ba) # columns
|
||||||
s.sendall(ba)
|
s.sendall(ba)
|
||||||
|
|
||||||
|
|
||||||
def assertPacket(packet, expected):
|
def assertPacket(packet, expected):
|
||||||
assert(packet == expected), packet
|
assert packet == expected, packet
|
||||||
|
|
||||||
|
|
||||||
class Progress():
|
class Progress:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# NOTE: this is done in ctor to initialize __dict__
|
# NOTE: this is done in ctor to initialize __dict__
|
||||||
self.read_rows = 0
|
self.read_rows = 0
|
||||||
@ -198,11 +204,12 @@ class Progress():
|
|||||||
|
|
||||||
def __bool__(self):
|
def __bool__(self):
|
||||||
return (
|
return (
|
||||||
self.read_rows > 0 or
|
self.read_rows > 0
|
||||||
self.read_bytes > 0 or
|
or self.read_bytes > 0
|
||||||
self.total_rows_to_read > 0 or
|
or self.total_rows_to_read > 0
|
||||||
self.written_rows > 0 or
|
or self.written_rows > 0
|
||||||
self.written_bytes > 0)
|
or self.written_bytes > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def readProgress(s):
|
def readProgress(s):
|
||||||
@ -219,13 +226,14 @@ def readProgress(s):
|
|||||||
progress.readPacket(s)
|
progress.readPacket(s)
|
||||||
return progress
|
return progress
|
||||||
|
|
||||||
|
|
||||||
def readException(s):
|
def readException(s):
|
||||||
code = readUInt32(s)
|
code = readUInt32(s)
|
||||||
name = readStringBinary(s)
|
name = readStringBinary(s)
|
||||||
text = readStringBinary(s)
|
text = readStringBinary(s)
|
||||||
readStringBinary(s) # trace
|
readStringBinary(s) # trace
|
||||||
assertPacket(readUInt8(s), 0) # has_nested
|
assertPacket(readUInt8(s), 0) # has_nested
|
||||||
return "code {}: {}".format(code, text.replace('DB::Exception:', ''))
|
return "code {}: {}".format(code, text.replace("DB::Exception:", ""))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -236,7 +244,10 @@ def main():
|
|||||||
receiveHello(s)
|
receiveHello(s)
|
||||||
# For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet.
|
# For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet.
|
||||||
# NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback.
|
# NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback.
|
||||||
sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000")
|
sendQuery(
|
||||||
|
s,
|
||||||
|
"insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000",
|
||||||
|
)
|
||||||
|
|
||||||
# external tables
|
# external tables
|
||||||
sendEmptyBlock(s)
|
sendEmptyBlock(s)
|
||||||
|
@ -4,18 +4,19 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
class Tester:
|
class Tester:
|
||||||
'''
|
"""
|
||||||
- Creates test table
|
- Creates test table
|
||||||
- Deletes the specified range of rows
|
- Deletes the specified range of rows
|
||||||
- Masks another range using row-level policy
|
- Masks another range using row-level policy
|
||||||
- Runs some read queries and checks that the results
|
- Runs some read queries and checks that the results
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, session, url, index_granularity, total_rows):
|
def __init__(self, session, url, index_granularity, total_rows):
|
||||||
self.session = session
|
self.session = session
|
||||||
self.url = url
|
self.url = url
|
||||||
@ -25,10 +26,10 @@ class Tester:
|
|||||||
self.repro_queries = []
|
self.repro_queries = []
|
||||||
|
|
||||||
def report_error(self):
|
def report_error(self):
|
||||||
print('Repro steps:', '\n\n\t'.join(self.repro_queries))
|
print("Repro steps:", "\n\n\t".join(self.repro_queries))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
def query(self, query_text, include_in_repro_steps = True, expected_data = None):
|
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
|
||||||
self.repro_queries.append(query_text)
|
self.repro_queries.append(query_text)
|
||||||
resp = self.session.post(self.url, data=query_text)
|
resp = self.session.post(self.url, data=query_text)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
@ -36,113 +37,187 @@ class Tester:
|
|||||||
error = resp.text[0:40]
|
error = resp.text[0:40]
|
||||||
if error not in self.reported_errors:
|
if error not in self.reported_errors:
|
||||||
self.reported_errors.add(error)
|
self.reported_errors.add(error)
|
||||||
print('Code:', resp.status_code)
|
print("Code:", resp.status_code)
|
||||||
print('Result:', resp.text)
|
print("Result:", resp.text)
|
||||||
self.report_error()
|
self.report_error()
|
||||||
|
|
||||||
result = resp.text
|
result = resp.text
|
||||||
# Check that the result is as expected
|
# Check that the result is as expected
|
||||||
if ((not expected_data is None) and (int(result) != len(expected_data))):
|
if (not expected_data is None) and (int(result) != len(expected_data)):
|
||||||
print('Expected {} rows, got {}'.format(len(expected_data), result))
|
print("Expected {} rows, got {}".format(len(expected_data), result))
|
||||||
print('Expected data:' + str(expected_data))
|
print("Expected data:" + str(expected_data))
|
||||||
self.report_error()
|
self.report_error()
|
||||||
|
|
||||||
if not include_in_repro_steps:
|
if not include_in_repro_steps:
|
||||||
self.repro_queries.pop()
|
self.repro_queries.pop()
|
||||||
|
|
||||||
|
def check_data(
|
||||||
def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end):
|
self,
|
||||||
|
all_data,
|
||||||
|
delete_range_start,
|
||||||
|
delete_range_end,
|
||||||
|
row_level_policy_range_start,
|
||||||
|
row_level_policy_range_end,
|
||||||
|
):
|
||||||
all_data_after_delete = all_data[
|
all_data_after_delete = all_data[
|
||||||
~((all_data.a == 0) &
|
~(
|
||||||
(all_data.b > delete_range_start) &
|
(all_data.a == 0)
|
||||||
(all_data.b <= delete_range_end))]
|
& (all_data.b > delete_range_start)
|
||||||
|
& (all_data.b <= delete_range_end)
|
||||||
|
)
|
||||||
|
]
|
||||||
all_data_after_row_policy = all_data_after_delete[
|
all_data_after_row_policy = all_data_after_delete[
|
||||||
(all_data_after_delete.b <= row_level_policy_range_start) |
|
(all_data_after_delete.b <= row_level_policy_range_start)
|
||||||
(all_data_after_delete.b > row_level_policy_range_end)]
|
| (all_data_after_delete.b > row_level_policy_range_end)
|
||||||
|
]
|
||||||
|
|
||||||
for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value
|
for to_select in [
|
||||||
self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy)
|
"count()",
|
||||||
|
"sum(d)",
|
||||||
|
]: # Test reading with and without column with default value
|
||||||
|
self.query(
|
||||||
|
"SELECT {} FROM tab_02473;".format(to_select),
|
||||||
|
False,
|
||||||
|
all_data_after_row_policy,
|
||||||
|
)
|
||||||
|
|
||||||
delta = 10
|
delta = 10
|
||||||
for query_range_start in [0, delta]:
|
for query_range_start in [0, delta]:
|
||||||
for query_range_end in [self.total_rows - delta]: #, self.total_rows]:
|
for query_range_end in [self.total_rows - delta]: # , self.total_rows]:
|
||||||
expected = all_data_after_row_policy[
|
expected = all_data_after_row_policy[
|
||||||
(all_data_after_row_policy.a == 0) &
|
(all_data_after_row_policy.a == 0)
|
||||||
(all_data_after_row_policy.b > query_range_start) &
|
& (all_data_after_row_policy.b > query_range_start)
|
||||||
(all_data_after_row_policy.b <= query_range_end)]
|
& (all_data_after_row_policy.b <= query_range_end)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format(
|
]
|
||||||
to_select, query_range_start, query_range_end), False, expected)
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
|
||||||
|
to_select, query_range_start, query_range_end
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
expected = all_data_after_row_policy[
|
expected = all_data_after_row_policy[
|
||||||
(all_data_after_row_policy.a == 0) &
|
(all_data_after_row_policy.a == 0)
|
||||||
(all_data_after_row_policy.c > query_range_start) &
|
& (all_data_after_row_policy.c > query_range_start)
|
||||||
(all_data_after_row_policy.c <= query_range_end)]
|
& (all_data_after_row_policy.c <= query_range_end)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format(
|
]
|
||||||
to_select, query_range_start, query_range_end), False, expected)
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format(
|
||||||
|
to_select, query_range_start, query_range_end
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
expected = all_data_after_row_policy[
|
expected = all_data_after_row_policy[
|
||||||
(all_data_after_row_policy.a == 0) &
|
(all_data_after_row_policy.a == 0)
|
||||||
((all_data_after_row_policy.c <= query_range_start) |
|
& (
|
||||||
(all_data_after_row_policy.c > query_range_end))]
|
(all_data_after_row_policy.c <= query_range_start)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format(
|
| (all_data_after_row_policy.c > query_range_end)
|
||||||
to_select, query_range_start, query_range_end), False, expected)
|
)
|
||||||
|
]
|
||||||
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format(
|
||||||
|
to_select, query_range_start, query_range_end
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_test(
|
||||||
def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end):
|
self,
|
||||||
|
delete_range_start,
|
||||||
|
delete_range_end,
|
||||||
|
row_level_policy_range_start,
|
||||||
|
row_level_policy_range_end,
|
||||||
|
):
|
||||||
self.repro_queries = []
|
self.repro_queries = []
|
||||||
|
|
||||||
self.query('''
|
self.query(
|
||||||
|
"""
|
||||||
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a))
|
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a))
|
||||||
ENGINE = MergeTree() ORDER BY (a, b)
|
ENGINE = MergeTree() ORDER BY (a, b)
|
||||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity))
|
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
|
||||||
|
self.index_granularity
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows))
|
self.query(
|
||||||
|
"INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format(
|
||||||
|
self.total_rows
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;")
|
all_data = client.query_return_df(
|
||||||
|
"SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;"
|
||||||
|
)
|
||||||
|
|
||||||
self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;')
|
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
|
||||||
|
|
||||||
# After all data has been written add a column with default value
|
# After all data has been written add a column with default value
|
||||||
self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;')
|
self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;")
|
||||||
|
|
||||||
self.check_data(all_data, -100, -100, -100, -100)
|
self.check_data(all_data, -100, -100, -100, -100)
|
||||||
|
|
||||||
self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format(
|
self.query(
|
||||||
delete_range_start, delete_range_end))
|
"DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format(
|
||||||
|
delete_range_start, delete_range_end
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.check_data(all_data, delete_range_start, delete_range_end, -100, -100)
|
self.check_data(all_data, delete_range_start, delete_range_end, -100, -100)
|
||||||
|
|
||||||
self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format(
|
self.query(
|
||||||
row_level_policy_range_start, row_level_policy_range_end))
|
"CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format(
|
||||||
|
row_level_policy_range_start, row_level_policy_range_end
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end)
|
self.check_data(
|
||||||
|
all_data,
|
||||||
|
delete_range_start,
|
||||||
|
delete_range_end,
|
||||||
|
row_level_policy_range_start,
|
||||||
|
row_level_policy_range_end,
|
||||||
|
)
|
||||||
|
|
||||||
self.query('DROP POLICY policy_tab_02473 ON tab_02473;')
|
self.query("DROP POLICY policy_tab_02473 ON tab_02473;")
|
||||||
|
|
||||||
self.query('DROP TABLE tab_02473;')
|
|
||||||
|
|
||||||
|
self.query("DROP TABLE tab_02473;")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Set mutations to synchronous mode and enable lightweight DELETE's
|
# Set mutations to synchronous mode and enable lightweight DELETE's
|
||||||
url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1'
|
url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1"
|
||||||
|
|
||||||
default_index_granularity = 10;
|
default_index_granularity = 10
|
||||||
total_rows = 8 * default_index_granularity
|
total_rows = 8 * default_index_granularity
|
||||||
step = default_index_granularity
|
step = default_index_granularity
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]:
|
for index_granularity in [
|
||||||
|
default_index_granularity - 1,
|
||||||
|
default_index_granularity,
|
||||||
|
]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]:
|
||||||
tester = Tester(session, url, index_granularity, total_rows)
|
tester = Tester(session, url, index_granularity, total_rows)
|
||||||
# Test combinations of ranges of various size masked by lightweight DELETES
|
# Test combinations of ranges of various size masked by lightweight DELETES
|
||||||
# along with ranges of various size masked by row-level policies
|
# along with ranges of various size masked by row-level policies
|
||||||
for delete_range_start in range(0, total_rows, 3 * step):
|
for delete_range_start in range(0, total_rows, 3 * step):
|
||||||
for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step):
|
for delete_range_end in range(
|
||||||
|
delete_range_start + 3 * step, total_rows, 2 * step
|
||||||
|
):
|
||||||
for row_level_policy_range_start in range(0, total_rows, 3 * step):
|
for row_level_policy_range_start in range(0, total_rows, 3 * step):
|
||||||
for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step):
|
for row_level_policy_range_end in range(
|
||||||
tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end)
|
row_level_policy_range_start + 3 * step, total_rows, 2 * step
|
||||||
|
):
|
||||||
|
tester.run_test(
|
||||||
|
delete_range_start,
|
||||||
|
delete_range_end,
|
||||||
|
row_level_policy_range_start,
|
||||||
|
row_level_policy_range_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -4,16 +4,17 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
||||||
class Tester:
|
class Tester:
|
||||||
'''
|
"""
|
||||||
- Creates test table with multiple integer columns
|
- Creates test table with multiple integer columns
|
||||||
- Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct
|
- Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def __init__(self, session, url, index_granularity, total_rows):
|
def __init__(self, session, url, index_granularity, total_rows):
|
||||||
self.session = session
|
self.session = session
|
||||||
self.url = url
|
self.url = url
|
||||||
@ -23,10 +24,10 @@ class Tester:
|
|||||||
self.repro_queries = []
|
self.repro_queries = []
|
||||||
|
|
||||||
def report_error(self):
|
def report_error(self):
|
||||||
print('Repro steps:', '\n\n\t'.join(self.repro_queries))
|
print("Repro steps:", "\n\n\t".join(self.repro_queries))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
def query(self, query_text, include_in_repro_steps = True, expected_data = None):
|
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
|
||||||
self.repro_queries.append(query_text)
|
self.repro_queries.append(query_text)
|
||||||
resp = self.session.post(self.url, data=query_text)
|
resp = self.session.post(self.url, data=query_text)
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
@ -34,98 +35,150 @@ class Tester:
|
|||||||
error = resp.text[0:40]
|
error = resp.text[0:40]
|
||||||
if error not in self.reported_errors:
|
if error not in self.reported_errors:
|
||||||
self.reported_errors.add(error)
|
self.reported_errors.add(error)
|
||||||
print('Code:', resp.status_code)
|
print("Code:", resp.status_code)
|
||||||
print('Result:', resp.text)
|
print("Result:", resp.text)
|
||||||
self.report_error()
|
self.report_error()
|
||||||
|
|
||||||
result = resp.text
|
result = resp.text
|
||||||
# Check that the result is as expected
|
# Check that the result is as expected
|
||||||
if ((not expected_data is None) and (int(result) != len(expected_data))):
|
if (not expected_data is None) and (int(result) != len(expected_data)):
|
||||||
print('Expected {} rows, got {}'.format(len(expected_data), result))
|
print("Expected {} rows, got {}".format(len(expected_data), result))
|
||||||
print('Expected data:' + str(expected_data))
|
print("Expected data:" + str(expected_data))
|
||||||
self.report_error()
|
self.report_error()
|
||||||
|
|
||||||
if not include_in_repro_steps:
|
if not include_in_repro_steps:
|
||||||
self.repro_queries.pop()
|
self.repro_queries.pop()
|
||||||
|
|
||||||
|
def check_data(
|
||||||
def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end):
|
self, all_data, c_range_start, c_range_end, d_range_start, d_range_end
|
||||||
for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value
|
):
|
||||||
self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data)
|
for to_select in [
|
||||||
|
"count()",
|
||||||
|
"sum(e)",
|
||||||
|
]: # Test reading with and without column with default value
|
||||||
|
self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data)
|
||||||
|
|
||||||
delta = 10
|
delta = 10
|
||||||
for b_range_start in [0, delta]:
|
for b_range_start in [0, delta]:
|
||||||
for b_range_end in [self.total_rows - delta]: #, self.total_rows]:
|
for b_range_end in [self.total_rows - delta]: # , self.total_rows]:
|
||||||
expected = all_data[
|
expected = all_data[
|
||||||
(all_data.a == 0) &
|
(all_data.a == 0)
|
||||||
(all_data.b > b_range_start) &
|
& (all_data.b > b_range_start)
|
||||||
(all_data.b <= b_range_end)]
|
& (all_data.b <= b_range_end)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format(
|
]
|
||||||
to_select, b_range_start, b_range_end), False, expected)
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
|
||||||
|
to_select, b_range_start, b_range_end
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
expected = all_data[
|
expected = all_data[
|
||||||
(all_data.a == 0) &
|
(all_data.a == 0)
|
||||||
(all_data.b > b_range_start) &
|
& (all_data.b > b_range_start)
|
||||||
(all_data.b <= b_range_end) &
|
& (all_data.b <= b_range_end)
|
||||||
(all_data.c > c_range_start) &
|
& (all_data.c > c_range_start)
|
||||||
(all_data.c <= c_range_end)]
|
& (all_data.c <= c_range_end)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format(
|
]
|
||||||
to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected)
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format(
|
||||||
|
to_select,
|
||||||
|
b_range_start,
|
||||||
|
b_range_end,
|
||||||
|
c_range_start,
|
||||||
|
c_range_end,
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
expected = all_data[
|
expected = all_data[
|
||||||
(all_data.a == 0) &
|
(all_data.a == 0)
|
||||||
(all_data.b > b_range_start) &
|
& (all_data.b > b_range_start)
|
||||||
(all_data.b <= b_range_end) &
|
& (all_data.b <= b_range_end)
|
||||||
(all_data.c > c_range_start) &
|
& (all_data.c > c_range_start)
|
||||||
(all_data.c <= c_range_end) &
|
& (all_data.c <= c_range_end)
|
||||||
(all_data.d > d_range_start) &
|
& (all_data.d > d_range_start)
|
||||||
(all_data.d <= d_range_end)]
|
& (all_data.d <= d_range_end)
|
||||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format(
|
]
|
||||||
to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected)
|
self.query(
|
||||||
|
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format(
|
||||||
|
to_select,
|
||||||
|
b_range_start,
|
||||||
|
b_range_end,
|
||||||
|
c_range_start,
|
||||||
|
c_range_end,
|
||||||
|
d_range_start,
|
||||||
|
d_range_end,
|
||||||
|
),
|
||||||
|
False,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
|
||||||
def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end):
|
def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end):
|
||||||
self.repro_queries = []
|
self.repro_queries = []
|
||||||
|
|
||||||
self.query('''
|
self.query(
|
||||||
|
"""
|
||||||
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a))
|
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a))
|
||||||
ENGINE = MergeTree() ORDER BY (a, b)
|
ENGINE = MergeTree() ORDER BY (a, b)
|
||||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity))
|
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
|
||||||
|
self.index_granularity
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows))
|
self.query(
|
||||||
|
"INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format(
|
||||||
|
self.total_rows
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
client = ClickHouseClient()
|
client = ClickHouseClient()
|
||||||
all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;")
|
all_data = client.query_return_df(
|
||||||
|
"SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;"
|
||||||
|
)
|
||||||
|
|
||||||
self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;')
|
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
|
||||||
|
|
||||||
# After all data has been written add a column with default value
|
# After all data has been written add a column with default value
|
||||||
self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;')
|
self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;")
|
||||||
|
|
||||||
self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end)
|
self.check_data(
|
||||||
|
all_data, c_range_start, c_range_end, d_range_start, d_range_end
|
||||||
self.query('DROP TABLE tab_02473;')
|
)
|
||||||
|
|
||||||
|
self.query("DROP TABLE tab_02473;")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Enable multiple prewhere read steps
|
# Enable multiple prewhere read steps
|
||||||
url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1'
|
url = (
|
||||||
|
os.environ["CLICKHOUSE_URL"]
|
||||||
|
+ "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1"
|
||||||
|
)
|
||||||
|
|
||||||
default_index_granularity = 10;
|
default_index_granularity = 10
|
||||||
total_rows = 8 * default_index_granularity
|
total_rows = 8 * default_index_granularity
|
||||||
step = default_index_granularity
|
step = default_index_granularity
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
for index_granularity in [default_index_granularity-1, default_index_granularity]:
|
for index_granularity in [default_index_granularity - 1, default_index_granularity]:
|
||||||
tester = Tester(session, url, index_granularity, total_rows)
|
tester = Tester(session, url, index_granularity, total_rows)
|
||||||
# Test combinations of ranges of columns c and d
|
# Test combinations of ranges of columns c and d
|
||||||
for c_range_start in range(0, total_rows, int(2.3 * step)):
|
for c_range_start in range(0, total_rows, int(2.3 * step)):
|
||||||
for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)):
|
for c_range_end in range(
|
||||||
for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)):
|
c_range_start + 3 * step, total_rows, int(2.1 * step)
|
||||||
for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)):
|
):
|
||||||
tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end)
|
for d_range_start in range(
|
||||||
|
int(0.5 * step), total_rows, int(2.7 * step)
|
||||||
|
):
|
||||||
|
for d_range_end in range(
|
||||||
|
d_range_start + 3 * step, total_rows, int(2.2 * step)
|
||||||
|
):
|
||||||
|
tester.run_test(
|
||||||
|
c_range_start, c_range_end, d_range_start, d_range_end
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ import time
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||||
|
|
||||||
from pure_http_client import ClickHouseClient
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
@ -16,14 +16,23 @@ client = ClickHouseClient()
|
|||||||
|
|
||||||
# test table without partition
|
# test table without partition
|
||||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
||||||
client.query('''
|
client.query(
|
||||||
|
"""
|
||||||
CREATE TABLE t_async_insert_dedup_no_part (
|
CREATE TABLE t_async_insert_dedup_no_part (
|
||||||
KeyID UInt32
|
KeyID UInt32
|
||||||
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
||||||
ORDER BY (KeyID)
|
ORDER BY (KeyID)
|
||||||
''')
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
client.query("insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", settings = {"async_insert": 1, "wait_for_async_insert": 1, "insert_keeper_fault_injection_probability": 0})
|
client.query(
|
||||||
|
"insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)",
|
||||||
|
settings={
|
||||||
|
"async_insert": 1,
|
||||||
|
"wait_for_async_insert": 1,
|
||||||
|
"insert_keeper_fault_injection_probability": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
result = client.query("select count(*) from t_async_insert_dedup_no_part")
|
result = client.query("select count(*) from t_async_insert_dedup_no_part")
|
||||||
print(result, flush=True)
|
print(result, flush=True)
|
||||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
||||||
@ -32,13 +41,13 @@ client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
|||||||
def generate_data(q, total_number):
|
def generate_data(q, total_number):
|
||||||
old_data = []
|
old_data = []
|
||||||
max_chunk_size = 30
|
max_chunk_size = 30
|
||||||
partitions = ['2022-11-11 10:10:10', '2022-12-12 10:10:10']
|
partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"]
|
||||||
last_number = 0
|
last_number = 0
|
||||||
while True:
|
while True:
|
||||||
dup_simulate = random.randint(0,3)
|
dup_simulate = random.randint(0, 3)
|
||||||
# insert old data randomly. 25% of them are dup.
|
# insert old data randomly. 25% of them are dup.
|
||||||
if dup_simulate == 0:
|
if dup_simulate == 0:
|
||||||
last_idx = len(old_data)-1
|
last_idx = len(old_data) - 1
|
||||||
if last_idx < 0:
|
if last_idx < 0:
|
||||||
continue
|
continue
|
||||||
idx = last_idx - random.randint(0, 50)
|
idx = last_idx - random.randint(0, 50)
|
||||||
@ -53,7 +62,7 @@ def generate_data(q, total_number):
|
|||||||
end = start + chunk_size
|
end = start + chunk_size
|
||||||
if end > total_number:
|
if end > total_number:
|
||||||
end = total_number
|
end = total_number
|
||||||
for i in range(start, end+1):
|
for i in range(start, end + 1):
|
||||||
partition = partitions[random.randint(0, 1)]
|
partition = partitions[random.randint(0, 1)]
|
||||||
insert_stmt += "('{}', {}),".format(partition, i)
|
insert_stmt += "('{}', {}),".format(partition, i)
|
||||||
insert_stmt = insert_stmt[:-1]
|
insert_stmt = insert_stmt[:-1]
|
||||||
@ -65,33 +74,46 @@ def generate_data(q, total_number):
|
|||||||
# wait all the tasks is done.
|
# wait all the tasks is done.
|
||||||
q.join()
|
q.join()
|
||||||
|
|
||||||
|
|
||||||
def fetch_and_insert_data(q, client):
|
def fetch_and_insert_data(q, client):
|
||||||
while True:
|
while True:
|
||||||
insert = q.get()
|
insert = q.get()
|
||||||
client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0})
|
client.query(
|
||||||
|
insert,
|
||||||
|
settings={
|
||||||
|
"async_insert": 1,
|
||||||
|
"async_insert_deduplicate": 1,
|
||||||
|
"wait_for_async_insert": 0,
|
||||||
|
"async_insert_busy_timeout_ms": 1500,
|
||||||
|
"insert_keeper_fault_injection_probability": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
q.task_done()
|
q.task_done()
|
||||||
sleep_time = random.randint(50, 500)
|
sleep_time = random.randint(50, 500)
|
||||||
time.sleep(sleep_time/1000.0)
|
time.sleep(sleep_time / 1000.0)
|
||||||
|
|
||||||
|
|
||||||
# main process
|
# main process
|
||||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY")
|
client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY")
|
||||||
client.query('''
|
client.query(
|
||||||
|
"""
|
||||||
CREATE TABLE t_async_insert_dedup (
|
CREATE TABLE t_async_insert_dedup (
|
||||||
EventDate DateTime,
|
EventDate DateTime,
|
||||||
KeyID UInt32
|
KeyID UInt32
|
||||||
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
||||||
PARTITION BY toYYYYMM(EventDate)
|
PARTITION BY toYYYYMM(EventDate)
|
||||||
ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1
|
ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1
|
||||||
''')
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
q = queue.Queue(100)
|
q = queue.Queue(100)
|
||||||
total_number = 10000
|
total_number = 10000
|
||||||
|
|
||||||
gen = Thread(target = generate_data, args = [q, total_number])
|
gen = Thread(target=generate_data, args=[q, total_number])
|
||||||
gen.start()
|
gen.start()
|
||||||
|
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
insert = Thread(target = fetch_and_insert_data, args = [q, client])
|
insert = Thread(target=fetch_and_insert_data, args=[q, client])
|
||||||
insert.start()
|
insert.start()
|
||||||
|
|
||||||
gen.join()
|
gen.join()
|
||||||
@ -109,7 +131,7 @@ while True:
|
|||||||
errMsg = f"the size of result is {len(result)}. we expect {total_number}."
|
errMsg = f"the size of result is {len(result)}. we expect {total_number}."
|
||||||
else:
|
else:
|
||||||
for i in range(total_number):
|
for i in range(total_number):
|
||||||
expect = str(i+1)
|
expect = str(i + 1)
|
||||||
real = result[i]
|
real = result[i]
|
||||||
if expect != real:
|
if expect != real:
|
||||||
err = True
|
err = True
|
||||||
@ -117,7 +139,7 @@ while True:
|
|||||||
break
|
break
|
||||||
# retry several times to get stable results.
|
# retry several times to get stable results.
|
||||||
if err and retry >= 5:
|
if err and retry >= 5:
|
||||||
print (errMsg, flush=True)
|
print(errMsg, flush=True)
|
||||||
elif err:
|
elif err:
|
||||||
retry += 1
|
retry += 1
|
||||||
continue
|
continue
|
||||||
@ -125,11 +147,15 @@ while True:
|
|||||||
print(len(result), flush=True)
|
print(len(result), flush=True)
|
||||||
break
|
break
|
||||||
|
|
||||||
result = client.query("SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'")
|
result = client.query(
|
||||||
|
"SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'"
|
||||||
|
)
|
||||||
result = int(result.split()[0])
|
result = int(result.split()[0])
|
||||||
if result <= 0:
|
if result <= 0:
|
||||||
raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}")
|
raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}")
|
||||||
result = client.query("SELECT value FROM system.events where event = 'AsyncInsertCacheHits'")
|
result = client.query(
|
||||||
|
"SELECT value FROM system.events where event = 'AsyncInsertCacheHits'"
|
||||||
|
)
|
||||||
result = int(result.split()[0])
|
result = int(result.split()[0])
|
||||||
if result <= 0:
|
if result <= 0:
|
||||||
raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}")
|
raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}")
|
||||||
|
@ -19,9 +19,9 @@ import tenacity
|
|||||||
import xmltodict
|
import xmltodict
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
SELECT_VERSION = r'SELECT version()'
|
SELECT_VERSION = r"SELECT version()"
|
||||||
|
|
||||||
SELECT_UPTIME = r'''
|
SELECT_UPTIME = r"""
|
||||||
{% if version_ge('21.3') -%}
|
{% if version_ge('21.3') -%}
|
||||||
SELECT formatReadableTimeDelta(uptime())
|
SELECT formatReadableTimeDelta(uptime())
|
||||||
{% else -%}
|
{% else -%}
|
||||||
@ -29,18 +29,18 @@ SELECT
|
|||||||
toString(floor(uptime() / 3600 / 24)) || ' days ' ||
|
toString(floor(uptime() / 3600 / 24)) || ' days ' ||
|
||||||
toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours'
|
toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours'
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'"
|
SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'"
|
||||||
|
|
||||||
SELECT_DATABASE_ENGINES = r'''SELECT
|
SELECT_DATABASE_ENGINES = r"""SELECT
|
||||||
engine,
|
engine,
|
||||||
count() "count"
|
count() "count"
|
||||||
FROM system.databases
|
FROM system.databases
|
||||||
GROUP BY engine
|
GROUP BY engine
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_DATABASES = r'''SELECT
|
SELECT_DATABASES = r"""SELECT
|
||||||
name,
|
name,
|
||||||
engine,
|
engine,
|
||||||
tables,
|
tables,
|
||||||
@ -62,17 +62,17 @@ LEFT JOIN
|
|||||||
) AS db_stats ON db.name = db_stats.database
|
) AS db_stats ON db.name = db_stats.database
|
||||||
ORDER BY bytes_on_disk DESC
|
ORDER BY bytes_on_disk DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_TABLE_ENGINES = r'''SELECT
|
SELECT_TABLE_ENGINES = r"""SELECT
|
||||||
engine,
|
engine,
|
||||||
count() "count"
|
count() "count"
|
||||||
FROM system.tables
|
FROM system.tables
|
||||||
WHERE database != 'system'
|
WHERE database != 'system'
|
||||||
GROUP BY engine
|
GROUP BY engine
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_DICTIONARIES = r'''SELECT
|
SELECT_DICTIONARIES = r"""SELECT
|
||||||
source,
|
source,
|
||||||
type,
|
type,
|
||||||
status,
|
status,
|
||||||
@ -80,13 +80,13 @@ SELECT_DICTIONARIES = r'''SELECT
|
|||||||
FROM system.dictionaries
|
FROM system.dictionaries
|
||||||
GROUP BY source, type, status
|
GROUP BY source, type, status
|
||||||
ORDER BY status DESC, source
|
ORDER BY status DESC, source
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_ACCESS = "SHOW ACCESS"
|
SELECT_ACCESS = "SHOW ACCESS"
|
||||||
|
|
||||||
SELECT_QUOTA_USAGE = "SHOW QUOTA"
|
SELECT_QUOTA_USAGE = "SHOW QUOTA"
|
||||||
|
|
||||||
SELECT_REPLICAS = r'''SELECT
|
SELECT_REPLICAS = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
is_leader,
|
is_leader,
|
||||||
@ -98,9 +98,9 @@ SELECT_REPLICAS = r'''SELECT
|
|||||||
FROM system.replicas
|
FROM system.replicas
|
||||||
ORDER BY absolute_delay DESC
|
ORDER BY absolute_delay DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_REPLICATION_QUEUE = r'''SELECT
|
SELECT_REPLICATION_QUEUE = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
replica_name,
|
replica_name,
|
||||||
@ -121,9 +121,9 @@ SELECT_REPLICATION_QUEUE = r'''SELECT
|
|||||||
FROM system.replication_queue
|
FROM system.replication_queue
|
||||||
ORDER BY create_time ASC
|
ORDER BY create_time ASC
|
||||||
LIMIT 20
|
LIMIT 20
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_REPLICATED_FETCHES = r'''SELECT
|
SELECT_REPLICATED_FETCHES = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
round(elapsed, 1) "elapsed",
|
round(elapsed, 1) "elapsed",
|
||||||
@ -140,9 +140,9 @@ SELECT_REPLICATED_FETCHES = r'''SELECT
|
|||||||
to_detached,
|
to_detached,
|
||||||
thread_id
|
thread_id
|
||||||
FROM system.replicated_fetches
|
FROM system.replicated_fetches
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_PARTS_PER_TABLE = r'''SELECT
|
SELECT_PARTS_PER_TABLE = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
count() "partitions",
|
count() "partitions",
|
||||||
@ -162,9 +162,9 @@ FROM
|
|||||||
GROUP BY database, table
|
GROUP BY database, table
|
||||||
ORDER BY max_parts_per_partition DESC
|
ORDER BY max_parts_per_partition DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_MERGES = r'''SELECT
|
SELECT_MERGES = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
round(elapsed, 1) "elapsed",
|
round(elapsed, 1) "elapsed",
|
||||||
@ -187,9 +187,9 @@ SELECT_MERGES = r'''SELECT
|
|||||||
formatReadableSize(memory_usage) "memory_usage"
|
formatReadableSize(memory_usage) "memory_usage"
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
FROM system.merges
|
FROM system.merges
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_MUTATIONS = r'''SELECT
|
SELECT_MUTATIONS = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
mutation_id,
|
mutation_id,
|
||||||
@ -206,9 +206,9 @@ SELECT_MUTATIONS = r'''SELECT
|
|||||||
FROM system.mutations
|
FROM system.mutations
|
||||||
WHERE NOT is_done
|
WHERE NOT is_done
|
||||||
ORDER BY create_time DESC
|
ORDER BY create_time DESC
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_RECENT_DATA_PARTS = r'''SELECT
|
SELECT_RECENT_DATA_PARTS = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
engine,
|
engine,
|
||||||
@ -242,9 +242,9 @@ SELECT_RECENT_DATA_PARTS = r'''SELECT
|
|||||||
FROM system.parts
|
FROM system.parts
|
||||||
WHERE modification_time > now() - INTERVAL 3 MINUTE
|
WHERE modification_time > now() - INTERVAL 3 MINUTE
|
||||||
ORDER BY modification_time DESC
|
ORDER BY modification_time DESC
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_DETACHED_DATA_PARTS = r'''SELECT
|
SELECT_DETACHED_DATA_PARTS = r"""SELECT
|
||||||
database,
|
database,
|
||||||
table,
|
table,
|
||||||
partition_id,
|
partition_id,
|
||||||
@ -255,9 +255,9 @@ SELECT_DETACHED_DATA_PARTS = r'''SELECT
|
|||||||
max_block_number,
|
max_block_number,
|
||||||
level
|
level
|
||||||
FROM system.detached_parts
|
FROM system.detached_parts
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_PROCESSES = r'''SELECT
|
SELECT_PROCESSES = r"""SELECT
|
||||||
elapsed,
|
elapsed,
|
||||||
query_id,
|
query_id,
|
||||||
{% if normalize_queries -%}
|
{% if normalize_queries -%}
|
||||||
@ -285,9 +285,9 @@ SELECT_PROCESSES = r'''SELECT
|
|||||||
{% endif -%}
|
{% endif -%}
|
||||||
FROM system.processes
|
FROM system.processes
|
||||||
ORDER BY elapsed DESC
|
ORDER BY elapsed DESC
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT
|
SELECT_TOP_QUERIES_BY_DURATION = r"""SELECT
|
||||||
type,
|
type,
|
||||||
query_start_time,
|
query_start_time,
|
||||||
query_duration_ms,
|
query_duration_ms,
|
||||||
@ -339,9 +339,9 @@ WHERE type != 'QueryStart'
|
|||||||
AND event_time >= now() - INTERVAL 1 DAY
|
AND event_time >= now() - INTERVAL 1 DAY
|
||||||
ORDER BY query_duration_ms DESC
|
ORDER BY query_duration_ms DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT
|
SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r"""SELECT
|
||||||
type,
|
type,
|
||||||
query_start_time,
|
query_start_time,
|
||||||
query_duration_ms,
|
query_duration_ms,
|
||||||
@ -393,9 +393,9 @@ WHERE type != 'QueryStart'
|
|||||||
AND event_time >= now() - INTERVAL 1 DAY
|
AND event_time >= now() - INTERVAL 1 DAY
|
||||||
ORDER BY memory_usage DESC
|
ORDER BY memory_usage DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_FAILED_QUERIES = r'''SELECT
|
SELECT_FAILED_QUERIES = r"""SELECT
|
||||||
type,
|
type,
|
||||||
query_start_time,
|
query_start_time,
|
||||||
query_duration_ms,
|
query_duration_ms,
|
||||||
@ -448,9 +448,9 @@ WHERE type != 'QueryStart'
|
|||||||
AND exception != ''
|
AND exception != ''
|
||||||
ORDER BY query_start_time DESC
|
ORDER BY query_start_time DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_STACK_TRACES = r'''SELECT
|
SELECT_STACK_TRACES = r"""SELECT
|
||||||
'\n' || arrayStringConcat(
|
'\n' || arrayStringConcat(
|
||||||
arrayMap(
|
arrayMap(
|
||||||
x,
|
x,
|
||||||
@ -459,9 +459,9 @@ SELECT_STACK_TRACES = r'''SELECT
|
|||||||
arrayMap(x -> demangle(addressToSymbol(x)), trace)),
|
arrayMap(x -> demangle(addressToSymbol(x)), trace)),
|
||||||
'\n') AS trace
|
'\n') AS trace
|
||||||
FROM system.stack_trace
|
FROM system.stack_trace
|
||||||
'''
|
"""
|
||||||
|
|
||||||
SELECT_CRASH_LOG = r'''SELECT
|
SELECT_CRASH_LOG = r"""SELECT
|
||||||
event_time,
|
event_time,
|
||||||
signal,
|
signal,
|
||||||
thread_id,
|
thread_id,
|
||||||
@ -470,7 +470,7 @@ SELECT_CRASH_LOG = r'''SELECT
|
|||||||
version
|
version
|
||||||
FROM system.crash_log
|
FROM system.crash_log
|
||||||
ORDER BY event_time DESC
|
ORDER BY event_time DESC
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
|
||||||
def retry(exception_types, max_attempts=5, max_interval=5):
|
def retry(exception_types, max_attempts=5, max_interval=5):
|
||||||
@ -481,7 +481,8 @@ def retry(exception_types, max_attempts=5, max_interval=5):
|
|||||||
retry=tenacity.retry_if_exception_type(exception_types),
|
retry=tenacity.retry_if_exception_type(exception_types),
|
||||||
wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval),
|
wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval),
|
||||||
stop=tenacity.stop_after_attempt(max_attempts),
|
stop=tenacity.stop_after_attempt(max_attempts),
|
||||||
reraise=True)
|
reraise=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ClickhouseError(Exception):
|
class ClickhouseError(Exception):
|
||||||
@ -502,9 +503,9 @@ class ClickhouseClient:
|
|||||||
def __init__(self, *, host="localhost", port=8123, user="default", password):
|
def __init__(self, *, host="localhost", port=8123, user="default", password):
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
if user:
|
if user:
|
||||||
self._session.headers['X-ClickHouse-User'] = user
|
self._session.headers["X-ClickHouse-User"] = user
|
||||||
self._session.headers['X-ClickHouse-Key'] = password
|
self._session.headers["X-ClickHouse-Key"] = password
|
||||||
self._url = f'http://{host}:{port}'
|
self._url = f"http://{host}:{port}"
|
||||||
self._timeout = 60
|
self._timeout = 60
|
||||||
self._ch_version = None
|
self._ch_version = None
|
||||||
|
|
||||||
@ -516,7 +517,16 @@ class ClickhouseClient:
|
|||||||
return self._ch_version
|
return self._ch_version
|
||||||
|
|
||||||
@retry(requests.exceptions.ConnectionError)
|
@retry(requests.exceptions.ConnectionError)
|
||||||
def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False):
|
def query(
|
||||||
|
self,
|
||||||
|
query,
|
||||||
|
query_args=None,
|
||||||
|
format=None,
|
||||||
|
post_data=None,
|
||||||
|
timeout=None,
|
||||||
|
echo=False,
|
||||||
|
dry_run=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Execute query.
|
Execute query.
|
||||||
"""
|
"""
|
||||||
@ -524,28 +534,30 @@ class ClickhouseClient:
|
|||||||
query = self.render_query(query, **query_args)
|
query = self.render_query(query, **query_args)
|
||||||
|
|
||||||
if format:
|
if format:
|
||||||
query += f' FORMAT {format}'
|
query += f" FORMAT {format}"
|
||||||
|
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
timeout = self._timeout
|
timeout = self._timeout
|
||||||
|
|
||||||
if echo:
|
if echo:
|
||||||
print(sqlparse.format(query, reindent=True), '\n')
|
print(sqlparse.format(query, reindent=True), "\n")
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._session.post(self._url,
|
response = self._session.post(
|
||||||
params={
|
self._url,
|
||||||
'query': query,
|
params={
|
||||||
},
|
"query": query,
|
||||||
json=post_data,
|
},
|
||||||
timeout=timeout)
|
json=post_data,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
if format in ('JSON', 'JSONCompact'):
|
if format in ("JSON", "JSONCompact"):
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
return response.text.strip()
|
return response.text.strip()
|
||||||
@ -555,7 +567,9 @@ class ClickhouseClient:
|
|||||||
def render_query(self, query, **kwargs):
|
def render_query(self, query, **kwargs):
|
||||||
env = jinja2.Environment()
|
env = jinja2.Environment()
|
||||||
|
|
||||||
env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version)
|
env.globals["version_ge"] = lambda version: version_ge(
|
||||||
|
self.clickhouse_version, version
|
||||||
|
)
|
||||||
|
|
||||||
template = env.from_string(query)
|
template = env.from_string(query)
|
||||||
return template.render(kwargs)
|
return template.render(kwargs)
|
||||||
@ -578,11 +592,13 @@ class ClickhouseConfig:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls):
|
def load(cls):
|
||||||
return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml'))
|
return ClickhouseConfig(
|
||||||
|
cls._load_config("/var/lib/clickhouse/preprocessed_configs/config.xml")
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load_config(config_path):
|
def _load_config(config_path):
|
||||||
with open(config_path, 'r') as file:
|
with open(config_path, "r") as file:
|
||||||
return xmltodict.parse(file.read())
|
return xmltodict.parse(file.read())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -591,8 +607,8 @@ class ClickhouseConfig:
|
|||||||
for key, value in list(config.items()):
|
for key, value in list(config.items()):
|
||||||
if isinstance(value, MutableMapping):
|
if isinstance(value, MutableMapping):
|
||||||
cls._mask_secrets(config[key])
|
cls._mask_secrets(config[key])
|
||||||
elif key in ('password', 'secret_access_key', 'header', 'identity'):
|
elif key in ("password", "secret_access_key", "header", "identity"):
|
||||||
config[key] = '*****'
|
config[key] = "*****"
|
||||||
|
|
||||||
|
|
||||||
class DiagnosticsData:
|
class DiagnosticsData:
|
||||||
@ -603,53 +619,53 @@ class DiagnosticsData:
|
|||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
self.args = args
|
self.args = args
|
||||||
self.host = args.host
|
self.host = args.host
|
||||||
self._sections = [{'section': None, 'data': {}}]
|
self._sections = [{"section": None, "data": {}}]
|
||||||
|
|
||||||
def add_string(self, name, value, section=None):
|
def add_string(self, name, value, section=None):
|
||||||
self._section(section)[name] = {
|
self._section(section)[name] = {
|
||||||
'type': 'string',
|
"type": "string",
|
||||||
'value': value,
|
"value": value,
|
||||||
}
|
}
|
||||||
|
|
||||||
def add_xml_document(self, name, document, section=None):
|
def add_xml_document(self, name, document, section=None):
|
||||||
self._section(section)[name] = {
|
self._section(section)[name] = {
|
||||||
'type': 'xml',
|
"type": "xml",
|
||||||
'value': document,
|
"value": document,
|
||||||
}
|
}
|
||||||
|
|
||||||
def add_query(self, name, query, result, section=None):
|
def add_query(self, name, query, result, section=None):
|
||||||
self._section(section)[name] = {
|
self._section(section)[name] = {
|
||||||
'type': 'query',
|
"type": "query",
|
||||||
'query': query,
|
"query": query,
|
||||||
'result': result,
|
"result": result,
|
||||||
}
|
}
|
||||||
|
|
||||||
def add_command(self, name, command, result, section=None):
|
def add_command(self, name, command, result, section=None):
|
||||||
self._section(section)[name] = {
|
self._section(section)[name] = {
|
||||||
'type': 'command',
|
"type": "command",
|
||||||
'command': command,
|
"command": command,
|
||||||
'result': result,
|
"result": result,
|
||||||
}
|
}
|
||||||
|
|
||||||
def dump(self, format):
|
def dump(self, format):
|
||||||
if format.startswith('json'):
|
if format.startswith("json"):
|
||||||
result = self._dump_json()
|
result = self._dump_json()
|
||||||
elif format.startswith('yaml'):
|
elif format.startswith("yaml"):
|
||||||
result = self._dump_yaml()
|
result = self._dump_yaml()
|
||||||
else:
|
else:
|
||||||
result = self._dump_wiki()
|
result = self._dump_wiki()
|
||||||
|
|
||||||
if format.endswith('.gz'):
|
if format.endswith(".gz"):
|
||||||
compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer)
|
compressor = gzip.GzipFile(mode="wb", fileobj=sys.stdout.buffer)
|
||||||
compressor.write(result.encode())
|
compressor.write(result.encode())
|
||||||
else:
|
else:
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
def _section(self, name=None):
|
def _section(self, name=None):
|
||||||
if self._sections[-1]['section'] != name:
|
if self._sections[-1]["section"] != name:
|
||||||
self._sections.append({'section': name, 'data': {}})
|
self._sections.append({"section": name, "data": {}})
|
||||||
|
|
||||||
return self._sections[-1]['data']
|
return self._sections[-1]["data"]
|
||||||
|
|
||||||
def _dump_json(self):
|
def _dump_json(self):
|
||||||
"""
|
"""
|
||||||
@ -669,85 +685,85 @@ class DiagnosticsData:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def _write_title(buffer, value):
|
def _write_title(buffer, value):
|
||||||
buffer.write(f'### {value}\n')
|
buffer.write(f"### {value}\n")
|
||||||
|
|
||||||
def _write_subtitle(buffer, value):
|
def _write_subtitle(buffer, value):
|
||||||
buffer.write(f'#### {value}\n')
|
buffer.write(f"#### {value}\n")
|
||||||
|
|
||||||
def _write_string_item(buffer, name, item):
|
def _write_string_item(buffer, name, item):
|
||||||
value = item['value']
|
value = item["value"]
|
||||||
if value != '':
|
if value != "":
|
||||||
value = f'**{value}**'
|
value = f"**{value}**"
|
||||||
buffer.write(f'{name}: {value}\n')
|
buffer.write(f"{name}: {value}\n")
|
||||||
|
|
||||||
def _write_xml_item(buffer, section_name, name, item):
|
def _write_xml_item(buffer, section_name, name, item):
|
||||||
if section_name:
|
if section_name:
|
||||||
buffer.write(f'##### {name}\n')
|
buffer.write(f"##### {name}\n")
|
||||||
else:
|
else:
|
||||||
_write_subtitle(buffer, name)
|
_write_subtitle(buffer, name)
|
||||||
|
|
||||||
_write_result(buffer, item['value'], format='XML')
|
_write_result(buffer, item["value"], format="XML")
|
||||||
|
|
||||||
def _write_query_item(buffer, section_name, name, item):
|
def _write_query_item(buffer, section_name, name, item):
|
||||||
if section_name:
|
if section_name:
|
||||||
buffer.write(f'##### {name}\n')
|
buffer.write(f"##### {name}\n")
|
||||||
else:
|
else:
|
||||||
_write_subtitle(buffer, name)
|
_write_subtitle(buffer, name)
|
||||||
|
|
||||||
_write_query(buffer, item['query'])
|
_write_query(buffer, item["query"])
|
||||||
_write_result(buffer, item['result'])
|
_write_result(buffer, item["result"])
|
||||||
|
|
||||||
def _write_command_item(buffer, section_name, name, item):
|
def _write_command_item(buffer, section_name, name, item):
|
||||||
if section_name:
|
if section_name:
|
||||||
buffer.write(f'##### {name}\n')
|
buffer.write(f"##### {name}\n")
|
||||||
else:
|
else:
|
||||||
_write_subtitle(buffer, name)
|
_write_subtitle(buffer, name)
|
||||||
|
|
||||||
_write_command(buffer, item['command'])
|
_write_command(buffer, item["command"])
|
||||||
_write_result(buffer, item['result'])
|
_write_result(buffer, item["result"])
|
||||||
|
|
||||||
def _write_unknown_item(buffer, section_name, name, item):
|
def _write_unknown_item(buffer, section_name, name, item):
|
||||||
if section_name:
|
if section_name:
|
||||||
buffer.write(f'**{name}**\n')
|
buffer.write(f"**{name}**\n")
|
||||||
else:
|
else:
|
||||||
_write_subtitle(buffer, name)
|
_write_subtitle(buffer, name)
|
||||||
|
|
||||||
json.dump(item, buffer, indent=2)
|
json.dump(item, buffer, indent=2)
|
||||||
|
|
||||||
def _write_query(buffer, query):
|
def _write_query(buffer, query):
|
||||||
buffer.write('**query**\n')
|
buffer.write("**query**\n")
|
||||||
buffer.write('```sql\n')
|
buffer.write("```sql\n")
|
||||||
buffer.write(query)
|
buffer.write(query)
|
||||||
buffer.write('\n```\n')
|
buffer.write("\n```\n")
|
||||||
|
|
||||||
def _write_command(buffer, command):
|
def _write_command(buffer, command):
|
||||||
buffer.write('**command**\n')
|
buffer.write("**command**\n")
|
||||||
buffer.write('```\n')
|
buffer.write("```\n")
|
||||||
buffer.write(command)
|
buffer.write(command)
|
||||||
buffer.write('\n```\n')
|
buffer.write("\n```\n")
|
||||||
|
|
||||||
def _write_result(buffer, result, format=None):
|
def _write_result(buffer, result, format=None):
|
||||||
buffer.write('**result**\n')
|
buffer.write("**result**\n")
|
||||||
buffer.write(f'```{format}\n' if format else '```\n')
|
buffer.write(f"```{format}\n" if format else "```\n")
|
||||||
buffer.write(result)
|
buffer.write(result)
|
||||||
buffer.write('\n```\n')
|
buffer.write("\n```\n")
|
||||||
|
|
||||||
buffer = io.StringIO()
|
buffer = io.StringIO()
|
||||||
|
|
||||||
_write_title(buffer, f'Diagnostics data for host {self.host}')
|
_write_title(buffer, f"Diagnostics data for host {self.host}")
|
||||||
for section in self._sections:
|
for section in self._sections:
|
||||||
section_name = section['section']
|
section_name = section["section"]
|
||||||
if section_name:
|
if section_name:
|
||||||
_write_subtitle(buffer, section_name)
|
_write_subtitle(buffer, section_name)
|
||||||
|
|
||||||
for name, item in section['data'].items():
|
for name, item in section["data"].items():
|
||||||
if item['type'] == 'string':
|
if item["type"] == "string":
|
||||||
_write_string_item(buffer, name, item)
|
_write_string_item(buffer, name, item)
|
||||||
elif item['type'] == 'query':
|
elif item["type"] == "query":
|
||||||
_write_query_item(buffer, section_name, name, item)
|
_write_query_item(buffer, section_name, name, item)
|
||||||
elif item['type'] == 'command':
|
elif item["type"] == "command":
|
||||||
_write_command_item(buffer, section_name, name, item)
|
_write_command_item(buffer, section_name, name, item)
|
||||||
elif item['type'] == 'xml':
|
elif item["type"] == "xml":
|
||||||
_write_xml_item(buffer, section_name, name, item)
|
_write_xml_item(buffer, section_name, name, item)
|
||||||
else:
|
else:
|
||||||
_write_unknown_item(buffer, section_name, name, item)
|
_write_unknown_item(buffer, section_name, name, item)
|
||||||
@ -760,126 +776,196 @@ def main():
|
|||||||
Program entry point.
|
Program entry point.
|
||||||
"""
|
"""
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')
|
timestamp = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
|
||||||
client = ClickhouseClient(host=args.host, port=args.port, user=args.user, password=args.password)
|
client = ClickhouseClient(
|
||||||
|
host=args.host, port=args.port, user=args.user, password=args.password
|
||||||
|
)
|
||||||
ch_config = ClickhouseConfig.load()
|
ch_config = ClickhouseConfig.load()
|
||||||
version = client.clickhouse_version
|
version = client.clickhouse_version
|
||||||
system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']]
|
system_tables = [
|
||||||
|
row[0]
|
||||||
|
for row in execute_query(client, SELECT_SYSTEM_TABLES, format="JSONCompact")[
|
||||||
|
"data"
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
diagnostics = DiagnosticsData(args)
|
diagnostics = DiagnosticsData(args)
|
||||||
diagnostics.add_string('Version', version)
|
diagnostics.add_string("Version", version)
|
||||||
diagnostics.add_string('Timestamp', timestamp)
|
diagnostics.add_string("Timestamp", timestamp)
|
||||||
diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME))
|
diagnostics.add_string("Uptime", execute_query(client, SELECT_UPTIME))
|
||||||
|
|
||||||
diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump())
|
diagnostics.add_xml_document("ClickHouse configuration", ch_config.dump())
|
||||||
|
|
||||||
if version_ge(version, '20.8'):
|
if version_ge(version, "20.8"):
|
||||||
add_query(diagnostics, 'Access configuration',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_ACCESS,
|
"Access configuration",
|
||||||
format='TSVRaw')
|
client=client,
|
||||||
add_query(diagnostics, 'Quotas',
|
query=SELECT_ACCESS,
|
||||||
client=client,
|
format="TSVRaw",
|
||||||
query=SELECT_QUOTA_USAGE,
|
)
|
||||||
format='Vertical')
|
add_query(
|
||||||
|
diagnostics,
|
||||||
|
"Quotas",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_QUOTA_USAGE,
|
||||||
|
format="Vertical",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'Database engines',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_DATABASE_ENGINES,
|
"Database engines",
|
||||||
format='PrettyCompactNoEscapes',
|
client=client,
|
||||||
section='Schema')
|
query=SELECT_DATABASE_ENGINES,
|
||||||
add_query(diagnostics, 'Databases (top 10 by size)',
|
format="PrettyCompactNoEscapes",
|
||||||
client=client,
|
section="Schema",
|
||||||
query=SELECT_DATABASES,
|
)
|
||||||
format='PrettyCompactNoEscapes',
|
add_query(
|
||||||
section='Schema')
|
diagnostics,
|
||||||
add_query(diagnostics, 'Table engines',
|
"Databases (top 10 by size)",
|
||||||
client=client,
|
client=client,
|
||||||
query=SELECT_TABLE_ENGINES,
|
query=SELECT_DATABASES,
|
||||||
format='PrettyCompactNoEscapes',
|
format="PrettyCompactNoEscapes",
|
||||||
section='Schema')
|
section="Schema",
|
||||||
add_query(diagnostics, 'Dictionaries',
|
)
|
||||||
client=client,
|
add_query(
|
||||||
query=SELECT_DICTIONARIES,
|
diagnostics,
|
||||||
format='PrettyCompactNoEscapes',
|
"Table engines",
|
||||||
section='Schema')
|
client=client,
|
||||||
|
query=SELECT_TABLE_ENGINES,
|
||||||
|
format="PrettyCompactNoEscapes",
|
||||||
|
section="Schema",
|
||||||
|
)
|
||||||
|
add_query(
|
||||||
|
diagnostics,
|
||||||
|
"Dictionaries",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_DICTIONARIES,
|
||||||
|
format="PrettyCompactNoEscapes",
|
||||||
|
section="Schema",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_REPLICAS,
|
"Replicated tables (top 10 by absolute delay)",
|
||||||
format='PrettyCompactNoEscapes',
|
client=client,
|
||||||
section='Replication')
|
query=SELECT_REPLICAS,
|
||||||
add_query(diagnostics, 'Replication queue (top 20 oldest tasks)',
|
format="PrettyCompactNoEscapes",
|
||||||
client=client,
|
section="Replication",
|
||||||
query=SELECT_REPLICATION_QUEUE,
|
)
|
||||||
format='Vertical',
|
add_query(
|
||||||
section='Replication')
|
diagnostics,
|
||||||
if version_ge(version, '21.3'):
|
"Replication queue (top 20 oldest tasks)",
|
||||||
add_query(diagnostics, 'Replicated fetches',
|
client=client,
|
||||||
client=client,
|
query=SELECT_REPLICATION_QUEUE,
|
||||||
query=SELECT_REPLICATED_FETCHES,
|
format="Vertical",
|
||||||
format='Vertical',
|
section="Replication",
|
||||||
section='Replication')
|
)
|
||||||
|
if version_ge(version, "21.3"):
|
||||||
|
add_query(
|
||||||
|
diagnostics,
|
||||||
|
"Replicated fetches",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_REPLICATED_FETCHES,
|
||||||
|
format="Vertical",
|
||||||
|
section="Replication",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'Top 10 tables by max parts per partition',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_PARTS_PER_TABLE,
|
"Top 10 tables by max parts per partition",
|
||||||
format='PrettyCompactNoEscapes')
|
client=client,
|
||||||
add_query(diagnostics, 'Merges in progress',
|
query=SELECT_PARTS_PER_TABLE,
|
||||||
client=client,
|
format="PrettyCompactNoEscapes",
|
||||||
query=SELECT_MERGES,
|
)
|
||||||
format='Vertical')
|
add_query(
|
||||||
add_query(diagnostics, 'Mutations in progress',
|
diagnostics,
|
||||||
client=client,
|
"Merges in progress",
|
||||||
query=SELECT_MUTATIONS,
|
client=client,
|
||||||
format='Vertical')
|
query=SELECT_MERGES,
|
||||||
add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)',
|
format="Vertical",
|
||||||
client=client,
|
)
|
||||||
query=SELECT_RECENT_DATA_PARTS,
|
add_query(
|
||||||
format='Vertical')
|
diagnostics,
|
||||||
|
"Mutations in progress",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_MUTATIONS,
|
||||||
|
format="Vertical",
|
||||||
|
)
|
||||||
|
add_query(
|
||||||
|
diagnostics,
|
||||||
|
"Recent data parts (modification time within last 3 minutes)",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_RECENT_DATA_PARTS,
|
||||||
|
format="Vertical",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'system.detached_parts',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_DETACHED_DATA_PARTS,
|
"system.detached_parts",
|
||||||
format='PrettyCompactNoEscapes',
|
client=client,
|
||||||
section='Detached data')
|
query=SELECT_DETACHED_DATA_PARTS,
|
||||||
add_command(diagnostics, 'Disk space usage',
|
format="PrettyCompactNoEscapes",
|
||||||
command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh',
|
section="Detached data",
|
||||||
section='Detached data')
|
)
|
||||||
|
add_command(
|
||||||
|
diagnostics,
|
||||||
|
"Disk space usage",
|
||||||
|
command="du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh",
|
||||||
|
section="Detached data",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'Queries in progress (process list)',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_PROCESSES,
|
"Queries in progress (process list)",
|
||||||
format='Vertical',
|
client=client,
|
||||||
section='Queries')
|
query=SELECT_PROCESSES,
|
||||||
add_query(diagnostics, 'Top 10 queries by duration',
|
format="Vertical",
|
||||||
client=client,
|
section="Queries",
|
||||||
query=SELECT_TOP_QUERIES_BY_DURATION,
|
)
|
||||||
format='Vertical',
|
add_query(
|
||||||
section='Queries')
|
diagnostics,
|
||||||
add_query(diagnostics, 'Top 10 queries by memory usage',
|
"Top 10 queries by duration",
|
||||||
client=client,
|
client=client,
|
||||||
query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE,
|
query=SELECT_TOP_QUERIES_BY_DURATION,
|
||||||
format='Vertical',
|
format="Vertical",
|
||||||
section='Queries')
|
section="Queries",
|
||||||
add_query(diagnostics, 'Last 10 failed queries',
|
)
|
||||||
client=client,
|
add_query(
|
||||||
query=SELECT_FAILED_QUERIES,
|
diagnostics,
|
||||||
format='Vertical',
|
"Top 10 queries by memory usage",
|
||||||
section='Queries')
|
client=client,
|
||||||
|
query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE,
|
||||||
|
format="Vertical",
|
||||||
|
section="Queries",
|
||||||
|
)
|
||||||
|
add_query(
|
||||||
|
diagnostics,
|
||||||
|
"Last 10 failed queries",
|
||||||
|
client=client,
|
||||||
|
query=SELECT_FAILED_QUERIES,
|
||||||
|
format="Vertical",
|
||||||
|
section="Queries",
|
||||||
|
)
|
||||||
|
|
||||||
add_query(diagnostics, 'Stack traces',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_STACK_TRACES,
|
"Stack traces",
|
||||||
format='Vertical')
|
client=client,
|
||||||
|
query=SELECT_STACK_TRACES,
|
||||||
|
format="Vertical",
|
||||||
|
)
|
||||||
|
|
||||||
if 'crash_log' in system_tables:
|
if "crash_log" in system_tables:
|
||||||
add_query(diagnostics, 'Crash log',
|
add_query(
|
||||||
client=client,
|
diagnostics,
|
||||||
query=SELECT_CRASH_LOG,
|
"Crash log",
|
||||||
format='Vertical')
|
client=client,
|
||||||
|
query=SELECT_CRASH_LOG,
|
||||||
|
format="Vertical",
|
||||||
|
)
|
||||||
|
|
||||||
add_command(diagnostics, 'uname', 'uname -a')
|
add_command(diagnostics, "uname", "uname -a")
|
||||||
|
|
||||||
diagnostics.dump(args.format)
|
diagnostics.dump(args.format)
|
||||||
|
|
||||||
@ -889,29 +975,34 @@ def parse_args():
|
|||||||
Parse command-line arguments.
|
Parse command-line arguments.
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--format',
|
parser.add_argument(
|
||||||
choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'],
|
"--format",
|
||||||
default='wiki')
|
choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"],
|
||||||
parser.add_argument('--normalize-queries',
|
default="wiki",
|
||||||
action='store_true',
|
)
|
||||||
default=False)
|
parser.add_argument("--normalize-queries", action="store_true", default=False)
|
||||||
parser.add_argument('--host', dest="host", help="clickhouse host")
|
parser.add_argument("--host", dest="host", help="clickhouse host")
|
||||||
parser.add_argument('--port', dest="port", default=8123, help="clickhouse http port")
|
parser.add_argument(
|
||||||
parser.add_argument('--user', dest="user", default="default", help="clickhouse user")
|
"--port", dest="port", default=8123, help="clickhouse http port"
|
||||||
parser.add_argument('--password', dest="password", help="clickhouse password")
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--user", dest="user", default="default", help="clickhouse user"
|
||||||
|
)
|
||||||
|
parser.add_argument("--password", dest="password", help="clickhouse password")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def add_query(diagnostics, name, client, query, format, section=None):
|
def add_query(diagnostics, name, client, query, format, section=None):
|
||||||
query_args = {
|
query_args = {
|
||||||
'normalize_queries': diagnostics.args.normalize_queries,
|
"normalize_queries": diagnostics.args.normalize_queries,
|
||||||
}
|
}
|
||||||
query = client.render_query(query, **query_args)
|
query = client.render_query(query, **query_args)
|
||||||
diagnostics.add_query(
|
diagnostics.add_query(
|
||||||
name=name,
|
name=name,
|
||||||
query=query,
|
query=query,
|
||||||
result=execute_query(client, query, render_query=False, format=format),
|
result=execute_query(client, query, render_query=False, format=format),
|
||||||
section=section)
|
section=section,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def execute_query(client, query, render_query=True, format=None):
|
def execute_query(client, query, render_query=True, format=None):
|
||||||
@ -926,14 +1017,18 @@ def execute_query(client, query, render_query=True, format=None):
|
|||||||
|
|
||||||
def add_command(diagnostics, name, command, section=None):
|
def add_command(diagnostics, name, command, section=None):
|
||||||
diagnostics.add_command(
|
diagnostics.add_command(
|
||||||
name=name,
|
name=name, command=command, result=execute_command(command), section=section
|
||||||
command=command,
|
)
|
||||||
result=execute_command(command),
|
|
||||||
section=section)
|
|
||||||
|
|
||||||
|
|
||||||
def execute_command(command, input=None):
|
def execute_command(command, input=None):
|
||||||
proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
proc = subprocess.Popen(
|
||||||
|
command,
|
||||||
|
shell=True,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
if isinstance(input, str):
|
if isinstance(input, str):
|
||||||
input = input.encode()
|
input = input.encode()
|
||||||
@ -941,7 +1036,7 @@ def execute_command(command, input=None):
|
|||||||
stdout, stderr = proc.communicate(input=input)
|
stdout, stderr = proc.communicate(input=input)
|
||||||
|
|
||||||
if proc.returncode:
|
if proc.returncode:
|
||||||
return f'failed with exit code {proc.returncode}\n{stderr.decode()}'
|
return f"failed with exit code {proc.returncode}\n{stderr.decode()}"
|
||||||
|
|
||||||
return stdout.decode()
|
return stdout.decode()
|
||||||
|
|
||||||
@ -957,8 +1052,8 @@ def parse_version(version):
|
|||||||
"""
|
"""
|
||||||
Parse version string.
|
Parse version string.
|
||||||
"""
|
"""
|
||||||
return [int(x) for x in version.strip().split('.') if x.isnumeric()]
|
return [int(x) for x in version.strip().split(".") if x.isnumeric()]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -28,39 +28,48 @@ class S3API(object):
|
|||||||
bucket = self.connection.get_bucket(bucket_name)
|
bucket = self.connection.get_bucket(bucket_name)
|
||||||
key = bucket.initiate_multipart_upload(s3_path)
|
key = bucket.initiate_multipart_upload(s3_path)
|
||||||
logging.info("Will upload to s3 path %s", s3_path)
|
logging.info("Will upload to s3 path %s", s3_path)
|
||||||
chunksize = 1024 * 1024 * 1024 # 1 GB
|
chunksize = 1024 * 1024 * 1024 # 1 GB
|
||||||
filesize = os.stat(file_path).st_size
|
filesize = os.stat(file_path).st_size
|
||||||
logging.info("File size is %s", filesize)
|
logging.info("File size is %s", filesize)
|
||||||
chunkcount = int(math.ceil(filesize / chunksize))
|
chunkcount = int(math.ceil(filesize / chunksize))
|
||||||
|
|
||||||
def call_back(x, y):
|
def call_back(x, y):
|
||||||
print("Uploaded {}/{} bytes".format(x, y))
|
print("Uploaded {}/{} bytes".format(x, y))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for i in range(chunkcount + 1):
|
for i in range(chunkcount + 1):
|
||||||
logging.info("Uploading chunk %s of %s", i, chunkcount + 1)
|
logging.info("Uploading chunk %s of %s", i, chunkcount + 1)
|
||||||
offset = chunksize * i
|
offset = chunksize * i
|
||||||
bytes_size = min(chunksize, filesize - offset)
|
bytes_size = min(chunksize, filesize - offset)
|
||||||
|
|
||||||
with open(file_path, 'r') as fp:
|
with open(file_path, "r") as fp:
|
||||||
fp.seek(offset)
|
fp.seek(offset)
|
||||||
key.upload_part_from_file(fp=fp, part_num=i+1,
|
key.upload_part_from_file(
|
||||||
size=bytes_size, cb=call_back,
|
fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100
|
||||||
num_cb=100)
|
)
|
||||||
key.complete_upload()
|
key.complete_upload()
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
key.cancel_upload()
|
key.cancel_upload()
|
||||||
raise ex
|
raise ex
|
||||||
logging.info("Contents were set")
|
logging.info("Contents were set")
|
||||||
return "https://{bucket}.{mds_url}/{path}".format(
|
return "https://{bucket}.{mds_url}/{path}".format(
|
||||||
bucket=bucket_name, mds_url=self.mds_url, path=s3_path)
|
bucket=bucket_name, mds_url=self.mds_url, path=s3_path
|
||||||
|
)
|
||||||
|
|
||||||
def set_file_contents(self, bucket, local_file_path, s3_file_path):
|
def set_file_contents(self, bucket, local_file_path, s3_file_path):
|
||||||
key = Key(bucket)
|
key = Key(bucket)
|
||||||
key.key = s3_file_path
|
key.key = s3_file_path
|
||||||
file_size = os.stat(local_file_path).st_size
|
file_size = os.stat(local_file_path).st_size
|
||||||
logging.info("Uploading file `%s` to `%s`. Size is %s", local_file_path, s3_file_path, file_size)
|
logging.info(
|
||||||
|
"Uploading file `%s` to `%s`. Size is %s",
|
||||||
|
local_file_path,
|
||||||
|
s3_file_path,
|
||||||
|
file_size,
|
||||||
|
)
|
||||||
|
|
||||||
def call_back(x, y):
|
def call_back(x, y):
|
||||||
print("Uploaded {}/{} bytes".format(x, y))
|
print("Uploaded {}/{} bytes".format(x, y))
|
||||||
|
|
||||||
key.set_contents_from_filename(local_file_path, cb=call_back)
|
key.set_contents_from_filename(local_file_path, cb=call_back)
|
||||||
|
|
||||||
def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path):
|
def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path):
|
||||||
@ -74,12 +83,14 @@ class S3API(object):
|
|||||||
path = root.split(os.sep)
|
path = root.split(os.sep)
|
||||||
for file in files:
|
for file in files:
|
||||||
local_file_path = os.path.join(root, file)
|
local_file_path = os.path.join(root, file)
|
||||||
s3_file = local_file_path[len(directory_path) + 1:]
|
s3_file = local_file_path[len(directory_path) + 1 :]
|
||||||
s3_file_path = os.path.join(s3_path, s3_file)
|
s3_file_path = os.path.join(s3_path, s3_file)
|
||||||
self.set_file_contents(bucket, local_file_path, s3_file_path)
|
self.set_file_contents(bucket, local_file_path, s3_file_path)
|
||||||
|
|
||||||
logging.info("Uploading finished")
|
logging.info("Uploading finished")
|
||||||
return "https://{bucket}.{mds_url}/{path}".format(bucket=bucket_name, mds_url=self.mds_url, path=s3_path)
|
return "https://{bucket}.{mds_url}/{path}".format(
|
||||||
|
bucket=bucket_name, mds_url=self.mds_url, path=s3_path
|
||||||
|
)
|
||||||
|
|
||||||
def list_bucket_keys(self, bucket_name):
|
def list_bucket_keys(self, bucket_name):
|
||||||
bucket = self.connection.get_bucket(bucket_name)
|
bucket = self.connection.get_bucket(bucket_name)
|
||||||
@ -91,100 +102,121 @@ class S3API(object):
|
|||||||
bucket.get_all_keys()
|
bucket.get_all_keys()
|
||||||
for obj in bucket.get_all_keys():
|
for obj in bucket.get_all_keys():
|
||||||
if obj.key.startswith(folder_path):
|
if obj.key.startswith(folder_path):
|
||||||
print('Removing ' + obj.key)
|
print("Removing " + obj.key)
|
||||||
obj.delete()
|
obj.delete()
|
||||||
|
|
||||||
|
|
||||||
def make_tar_file_for_table(clickhouse_data_path, db_name, table_name,
|
def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix):
|
||||||
tmp_prefix):
|
|
||||||
|
|
||||||
relative_data_path = os.path.join('data', db_name, table_name)
|
relative_data_path = os.path.join("data", db_name, table_name)
|
||||||
relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql')
|
relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql")
|
||||||
path_to_data = os.path.join(clickhouse_data_path, relative_data_path)
|
path_to_data = os.path.join(clickhouse_data_path, relative_data_path)
|
||||||
path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path)
|
path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path)
|
||||||
temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name)
|
temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name)
|
||||||
with tarfile.open(temporary_file_name, "w") as bundle:
|
with tarfile.open(temporary_file_name, "w") as bundle:
|
||||||
bundle.add(path_to_data, arcname=relative_data_path)
|
bundle.add(path_to_data, arcname=relative_data_path)
|
||||||
bundle.add(path_to_metadata, arcname=relative_meta_path)
|
bundle.add(path_to_metadata, arcname=relative_meta_path)
|
||||||
return temporary_file_name
|
return temporary_file_name
|
||||||
|
|
||||||
|
|
||||||
USAGE_EXAMPLES = '''
|
USAGE_EXAMPLES = """
|
||||||
examples:
|
examples:
|
||||||
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket
|
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket
|
||||||
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/
|
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/
|
||||||
'''
|
"""
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Simple tool for uploading datasets to clickhouse S3",
|
description="Simple tool for uploading datasets to clickhouse S3",
|
||||||
usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES))
|
usage="%(prog)s [options] {}".format(USAGE_EXAMPLES),
|
||||||
parser.add_argument('--s3-api-url', default='s3.amazonaws.com')
|
)
|
||||||
parser.add_argument('--s3-common-url', default='s3.amazonaws.com')
|
parser.add_argument("--s3-api-url", default="s3.amazonaws.com")
|
||||||
parser.add_argument('--bucket-name', default='clickhouse-datasets')
|
parser.add_argument("--s3-common-url", default="s3.amazonaws.com")
|
||||||
parser.add_argument('--dataset-name', required=True,
|
parser.add_argument("--bucket-name", default="clickhouse-datasets")
|
||||||
help='Name of dataset, will be used in uploaded path')
|
parser.add_argument(
|
||||||
parser.add_argument('--access-key-id', required=True)
|
"--dataset-name",
|
||||||
parser.add_argument('--secret-access-key', required=True)
|
required=True,
|
||||||
parser.add_argument('--clickhouse-data-path',
|
help="Name of dataset, will be used in uploaded path",
|
||||||
default='/var/lib/clickhouse/',
|
)
|
||||||
help='Path to clickhouse database on filesystem')
|
parser.add_argument("--access-key-id", required=True)
|
||||||
parser.add_argument('--s3-path', help='Path in s3, where to upload file')
|
parser.add_argument("--secret-access-key", required=True)
|
||||||
parser.add_argument('--tmp-prefix', default='/tmp',
|
parser.add_argument(
|
||||||
help='Prefix to store temporary downloaded file')
|
"--clickhouse-data-path",
|
||||||
|
default="/var/lib/clickhouse/",
|
||||||
|
help="Path to clickhouse database on filesystem",
|
||||||
|
)
|
||||||
|
parser.add_argument("--s3-path", help="Path in s3, where to upload file")
|
||||||
|
parser.add_argument(
|
||||||
|
"--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file"
|
||||||
|
)
|
||||||
data_group = parser.add_mutually_exclusive_group(required=True)
|
data_group = parser.add_mutually_exclusive_group(required=True)
|
||||||
table_name_argument = data_group.add_argument('--table-name',
|
table_name_argument = data_group.add_argument(
|
||||||
help='Name of table with database, if you are uploading partitions')
|
"--table-name",
|
||||||
data_group.add_argument('--file-path',
|
help="Name of table with database, if you are uploading partitions",
|
||||||
help='Name of file, if you are uploading')
|
)
|
||||||
data_group.add_argument('--directory-path', help='Path to directory with files to upload')
|
data_group.add_argument("--file-path", help="Name of file, if you are uploading")
|
||||||
data_group.add_argument('--list-directory', help='List s3 directory by --directory-path')
|
data_group.add_argument(
|
||||||
data_group.add_argument('--remove-directory', help='Remove s3 directory by --directory-path')
|
"--directory-path", help="Path to directory with files to upload"
|
||||||
|
)
|
||||||
|
data_group.add_argument(
|
||||||
|
"--list-directory", help="List s3 directory by --directory-path"
|
||||||
|
)
|
||||||
|
data_group.add_argument(
|
||||||
|
"--remove-directory", help="Remove s3 directory by --directory-path"
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.table_name is not None and args.clickhouse_data_path is None:
|
if args.table_name is not None and args.clickhouse_data_path is None:
|
||||||
raise argparse.ArgumentError(table_name_argument,
|
raise argparse.ArgumentError(
|
||||||
"You should specify --clickhouse-data-path to upload --table")
|
table_name_argument,
|
||||||
|
"You should specify --clickhouse-data-path to upload --table",
|
||||||
|
)
|
||||||
|
|
||||||
s3_conn = S3API(
|
s3_conn = S3API(
|
||||||
args.access_key_id, args.secret_access_key,
|
args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url
|
||||||
args.s3_api_url, args.s3_common_url)
|
)
|
||||||
|
|
||||||
file_path = ''
|
file_path = ""
|
||||||
directory_path = args.directory_path
|
directory_path = args.directory_path
|
||||||
s3_path = args.s3_path
|
s3_path = args.s3_path
|
||||||
|
|
||||||
if args.list_directory:
|
if args.list_directory:
|
||||||
s3_conn.list_bucket_keys(args.bucket_name)
|
s3_conn.list_bucket_keys(args.bucket_name)
|
||||||
elif args.remove_directory:
|
elif args.remove_directory:
|
||||||
print('Removing s3 path: ' + args.remove_directory)
|
print("Removing s3 path: " + args.remove_directory)
|
||||||
s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory)
|
s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory)
|
||||||
elif args.directory_path is not None:
|
elif args.directory_path is not None:
|
||||||
url = s3_conn.upload_data_for_static_files_disk(args.bucket_name, directory_path, s3_path)
|
url = s3_conn.upload_data_for_static_files_disk(
|
||||||
|
args.bucket_name, directory_path, s3_path
|
||||||
|
)
|
||||||
logging.info("Data uploaded: %s", url)
|
logging.info("Data uploaded: %s", url)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
if args.table_name is not None:
|
if args.table_name is not None:
|
||||||
if '.' not in args.table_name:
|
if "." not in args.table_name:
|
||||||
db_name = 'default'
|
db_name = "default"
|
||||||
else:
|
else:
|
||||||
db_name, table_name = args.table_name.split('.')
|
db_name, table_name = args.table_name.split(".")
|
||||||
file_path = make_tar_file_for_table(
|
file_path = make_tar_file_for_table(
|
||||||
args.clickhouse_data_path, db_name, table_name, args.tmp_prefix)
|
args.clickhouse_data_path, db_name, table_name, args.tmp_prefix
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
file_path = args.file_path
|
file_path = args.file_path
|
||||||
|
|
||||||
if 'tsv' in file_path:
|
if "tsv" in file_path:
|
||||||
s3_path = os.path.join(
|
s3_path = os.path.join(
|
||||||
args.dataset_name, 'tsv', os.path.basename(file_path))
|
args.dataset_name, "tsv", os.path.basename(file_path)
|
||||||
|
)
|
||||||
if args.table_name is not None:
|
if args.table_name is not None:
|
||||||
s3_path = os.path.join(
|
s3_path = os.path.join(
|
||||||
args.dataset_name, 'partitions', os.path.basename(file_path))
|
args.dataset_name, "partitions", os.path.basename(file_path)
|
||||||
|
)
|
||||||
elif args.s3_path is not None:
|
elif args.s3_path is not None:
|
||||||
s3_path = os.path.join(
|
s3_path = os.path.join(
|
||||||
args.dataset_name, args.s3_path, os.path.basename(file_path))
|
args.dataset_name, args.s3_path, os.path.basename(file_path)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise Exception("Don't know s3-path to upload")
|
raise Exception("Don't know s3-path to upload")
|
||||||
|
|
||||||
|
@ -11,13 +11,14 @@ from termcolor import colored
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
COLORMAP = {
|
COLORMAP = {
|
||||||
"success": colored("success", 'green'),
|
"success": colored("success", "green"),
|
||||||
"failure": colored("failure", 'red'),
|
"failure": colored("failure", "red"),
|
||||||
"error": colored("error", 'red'),
|
"error": colored("error", "red"),
|
||||||
"pending": colored("pending", 'yellow'),
|
"pending": colored("pending", "yellow"),
|
||||||
"not run": colored("not run", 'white'),
|
"not run": colored("not run", "white"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _filter_statuses(statuses):
|
def _filter_statuses(statuses):
|
||||||
"""
|
"""
|
||||||
Squash statuses to latest state
|
Squash statuses to latest state
|
||||||
@ -69,7 +70,7 @@ if __name__ == "__main__":
|
|||||||
date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S")
|
date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
gh = Github(args.token)
|
gh = Github(args.token)
|
||||||
repo = gh.get_repo('ClickHouse/ClickHouse')
|
repo = gh.get_repo("ClickHouse/ClickHouse")
|
||||||
commits = get_commits(repo, date_since)
|
commits = get_commits(repo, date_since)
|
||||||
|
|
||||||
longest_header = []
|
longest_header = []
|
||||||
@ -101,6 +102,6 @@ if __name__ == "__main__":
|
|||||||
result_data.append(current_result)
|
result_data.append(current_result)
|
||||||
|
|
||||||
if sys.stdout.isatty():
|
if sys.stdout.isatty():
|
||||||
longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header]
|
longest_header = [colored(h, "white", attrs=["bold"]) for h in longest_header]
|
||||||
|
|
||||||
print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid"))
|
print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid"))
|
||||||
|
Loading…
Reference in New Issue
Block a user