mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
apply black formatter
This commit is contained in:
parent
21f5d20b9e
commit
0ee8dfad53
@ -10,31 +10,38 @@ import requests
|
||||
import tempfile
|
||||
|
||||
|
||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
|
||||
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
|
||||
|
||||
AVAILABLE_DATASETS = {
|
||||
'hits': 'hits_v1.tar',
|
||||
'visits': 'visits_v1.tar',
|
||||
"hits": "hits_v1.tar",
|
||||
"visits": "visits_v1.tar",
|
||||
}
|
||||
|
||||
RETRIES_COUNT = 5
|
||||
|
||||
|
||||
def _get_temp_file_name():
|
||||
return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
||||
return os.path.join(
|
||||
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||
)
|
||||
|
||||
|
||||
def build_url(base_url, dataset):
|
||||
return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
|
||||
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
|
||||
|
||||
|
||||
def dowload_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'wb') as f:
|
||||
with open(path, "wb") as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info("No content-length, will download file without progress")
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
|
||||
if sys.stdout.isatty():
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
|
||||
sys.stdout.write(
|
||||
"\r[{}{}] {}%".format(
|
||||
"=" * done, " " * (50 - done), percent
|
||||
)
|
||||
)
|
||||
sys.stdout.flush()
|
||||
break
|
||||
except Exception as ex:
|
||||
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
else:
|
||||
raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
|
||||
raise Exception(
|
||||
"Cannot download dataset from {}, all retries exceeded".format(url)
|
||||
)
|
||||
|
||||
sys.stdout.write("\n")
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
||||
logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
|
||||
with tarfile.open(tar_path, 'r') as comp_file:
|
||||
logging.info(
|
||||
"Will unpack data from temp path %s to clickhouse db %s",
|
||||
tar_path,
|
||||
clickhouse_path,
|
||||
)
|
||||
with tarfile.open(tar_path, "r") as comp_file:
|
||||
comp_file.extractall(path=clickhouse_path)
|
||||
logging.info("Unpack finished")
|
||||
|
||||
@ -72,15 +90,21 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3")
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3"
|
||||
)
|
||||
|
||||
parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
|
||||
parser.add_argument('--url-prefix', default=DEFAULT_URL)
|
||||
parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
|
||||
parser.add_argument(
|
||||
"--dataset-names",
|
||||
required=True,
|
||||
nargs="+",
|
||||
choices=list(AVAILABLE_DATASETS.keys()),
|
||||
)
|
||||
parser.add_argument("--url-prefix", default=DEFAULT_URL)
|
||||
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
|
||||
|
||||
args = parser.parse_args()
|
||||
datasets = args.dataset_names
|
||||
logging.info("Will fetch following datasets: %s", ', '.join(datasets))
|
||||
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
|
||||
for dataset in datasets:
|
||||
logging.info("Processing %s", dataset)
|
||||
temp_archive_path = _get_temp_file_name()
|
||||
@ -92,10 +116,11 @@ if __name__ == "__main__":
|
||||
logging.info("Some exception occured %s", str(ex))
|
||||
raise
|
||||
finally:
|
||||
logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
|
||||
logging.info(
|
||||
"Will remove downloaded file %s from filesystem if it exists",
|
||||
temp_archive_path,
|
||||
)
|
||||
if os.path.exists(temp_archive_path):
|
||||
os.remove(temp_archive_path)
|
||||
logging.info("Processing of %s finished", dataset)
|
||||
logging.info("Fetch finished, enjoy your tables!")
|
||||
|
||||
|
||||
|
@ -77,7 +77,7 @@ def trim_for_log(s):
|
||||
return s
|
||||
lines = s.splitlines()
|
||||
if len(lines) > 10000:
|
||||
separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40
|
||||
separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40
|
||||
return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:])
|
||||
else:
|
||||
return "\n".join(lines)
|
||||
@ -95,7 +95,13 @@ class HTTPError(Exception):
|
||||
|
||||
# Helpers to execute queries via HTTP interface.
|
||||
def clickhouse_execute_http(
|
||||
base_args, query, timeout=30, settings=None, default_format=None, max_http_retries=5, retry_error_codes=False
|
||||
base_args,
|
||||
query,
|
||||
timeout=30,
|
||||
settings=None,
|
||||
default_format=None,
|
||||
max_http_retries=5,
|
||||
retry_error_codes=False,
|
||||
):
|
||||
if args.secure:
|
||||
client = http.client.HTTPSConnection(
|
||||
@ -146,12 +152,36 @@ def clickhouse_execute_http(
|
||||
|
||||
return data
|
||||
|
||||
def clickhouse_execute(base_args, query, timeout=30, settings=None, max_http_retries=5, retry_error_codes=False):
|
||||
return clickhouse_execute_http(base_args, query, timeout, settings, max_http_retries=max_http_retries, retry_error_codes=retry_error_codes).strip()
|
||||
|
||||
def clickhouse_execute(
|
||||
base_args,
|
||||
query,
|
||||
timeout=30,
|
||||
settings=None,
|
||||
max_http_retries=5,
|
||||
retry_error_codes=False,
|
||||
):
|
||||
return clickhouse_execute_http(
|
||||
base_args,
|
||||
query,
|
||||
timeout,
|
||||
settings,
|
||||
max_http_retries=max_http_retries,
|
||||
retry_error_codes=retry_error_codes,
|
||||
).strip()
|
||||
|
||||
|
||||
def clickhouse_execute_json(base_args, query, timeout=60, settings=None, max_http_retries=5):
|
||||
data = clickhouse_execute_http(base_args, query, timeout, settings, "JSONEachRow", max_http_retries=max_http_retries)
|
||||
def clickhouse_execute_json(
|
||||
base_args, query, timeout=60, settings=None, max_http_retries=5
|
||||
):
|
||||
data = clickhouse_execute_http(
|
||||
base_args,
|
||||
query,
|
||||
timeout,
|
||||
settings,
|
||||
"JSONEachRow",
|
||||
max_http_retries=max_http_retries,
|
||||
)
|
||||
if not data:
|
||||
return None
|
||||
rows = []
|
||||
@ -648,7 +678,9 @@ class TestCase:
|
||||
|
||||
clickhouse_execute(
|
||||
args,
|
||||
"CREATE DATABASE IF NOT EXISTS " + database + get_db_engine(testcase_args, database),
|
||||
"CREATE DATABASE IF NOT EXISTS "
|
||||
+ database
|
||||
+ get_db_engine(testcase_args, database),
|
||||
settings=get_create_database_settings(args, testcase_args),
|
||||
)
|
||||
|
||||
@ -831,7 +863,8 @@ class TestCase:
|
||||
|
||||
# TODO: remove checking "no-upgrade-check" after 23.1
|
||||
elif args.upgrade_check and (
|
||||
"no-upgrade-check" in tags or "no-upgrade-check" in tags):
|
||||
"no-upgrade-check" in tags or "no-upgrade-check" in tags
|
||||
):
|
||||
return FailureReason.NO_UPGRADE_CHECK
|
||||
|
||||
elif tags and ("no-s3-storage" in tags) and args.s3_storage:
|
||||
@ -1051,7 +1084,11 @@ class TestCase:
|
||||
@staticmethod
|
||||
def send_test_name_failed(suite: str, case: str):
|
||||
pid = os.getpid()
|
||||
clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'", retry_error_codes=True)
|
||||
clickhouse_execute(
|
||||
args,
|
||||
f"SELECT 'Running test {suite}/{case} from pid={pid}'",
|
||||
retry_error_codes=True,
|
||||
)
|
||||
|
||||
def run_single_test(
|
||||
self, server_logs_level, client_options
|
||||
@ -2220,6 +2257,7 @@ def find_binary(name):
|
||||
|
||||
raise Exception(f"{name} was not found in PATH")
|
||||
|
||||
|
||||
def find_clickhouse_command(binary, command):
|
||||
symlink = binary + "-" + command
|
||||
if os.access(symlink, os.X_OK):
|
||||
@ -2228,6 +2266,7 @@ def find_clickhouse_command(binary, command):
|
||||
# To avoid requiring symlinks (in case you download binary from CI)
|
||||
return binary + " " + command
|
||||
|
||||
|
||||
def get_additional_client_options(args):
|
||||
if args.client_option:
|
||||
return " ".join("--" + option for option in args.client_option)
|
||||
@ -2569,7 +2608,9 @@ if __name__ == "__main__":
|
||||
"WARNING: --extract_from_config option is deprecated and will be removed the the future",
|
||||
file=sys.stderr,
|
||||
)
|
||||
args.extract_from_config = find_clickhouse_command(args.binary, "extract-from-config")
|
||||
args.extract_from_config = find_clickhouse_command(
|
||||
args.binary, "extract-from-config"
|
||||
)
|
||||
|
||||
if args.configclient:
|
||||
args.client += " --config-file=" + args.configclient
|
||||
|
@ -243,11 +243,18 @@ if __name__ == "__main__":
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-random", action="store", dest="no_random", help="Disable tests order randomization"
|
||||
"--no-random",
|
||||
action="store",
|
||||
dest="no_random",
|
||||
help="Disable tests order randomization",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--pre-pull", action="store_true", default=False, dest="pre_pull", help="Pull images for docker_compose before all other actions"
|
||||
"--pre-pull",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="pre_pull",
|
||||
help="Pull images for docker_compose before all other actions",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -306,7 +313,6 @@ if __name__ == "__main__":
|
||||
# if not args.no_random:
|
||||
# rand_args += f"--random-seed={os.getpid()}"
|
||||
|
||||
|
||||
net = ""
|
||||
if args.network:
|
||||
net = "--net={}".format(args.network)
|
||||
@ -416,8 +422,11 @@ if __name__ == "__main__":
|
||||
name=CONTAINER_NAME,
|
||||
)
|
||||
|
||||
cmd = cmd_base + " " + args.command
|
||||
cmd_pre_pull = cmd_base + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;"
|
||||
cmd = cmd_base + " " + args.command
|
||||
cmd_pre_pull = (
|
||||
cmd_base
|
||||
+ " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;"
|
||||
)
|
||||
|
||||
containers = subprocess.check_output(
|
||||
f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}",
|
||||
|
@ -1,57 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
def gen_queries():
|
||||
create_template = 'create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}'
|
||||
drop_query = 'drop table if exists tab_00386'
|
||||
values = ('1', "'a'", 'tuple(1)', 'tuple(tuple(1))', "(1, 'a')", "tuple((1, 'a'))")
|
||||
create_template = "create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}"
|
||||
drop_query = "drop table if exists tab_00386"
|
||||
values = ("1", "'a'", "tuple(1)", "tuple(tuple(1))", "(1, 'a')", "tuple((1, 'a'))")
|
||||
insert_query = "insert into tab_00386 values (1, 'a', tuple(1), tuple(tuple(1)), (1, 'a'), tuple((1, 'a')))"
|
||||
columns = tuple('a b c d'.split())
|
||||
order_by_columns = tuple('a b c'.split())
|
||||
partition_by_columns = tuple(' tuple() a'.split())
|
||||
columns = tuple("a b c d".split())
|
||||
order_by_columns = tuple("a b c".split())
|
||||
partition_by_columns = tuple(" tuple() a".split())
|
||||
|
||||
for partition in partition_by_columns:
|
||||
for key_mask in range(1, 1 << len(order_by_columns)):
|
||||
key = ','.join(order_by_columns[i] for i in range(len(order_by_columns)) if (1 << i) & key_mask != 0)
|
||||
key = ",".join(
|
||||
order_by_columns[i]
|
||||
for i in range(len(order_by_columns))
|
||||
if (1 << i) & key_mask != 0
|
||||
)
|
||||
create_query = create_template.format(key, partition)
|
||||
for q in (drop_query, create_query, insert_query):
|
||||
yield q
|
||||
|
||||
for column, value in zip(columns, values):
|
||||
yield 'select {} in {} from tab_00386'.format(column, value)
|
||||
yield 'select {} in tuple({}) from tab_00386'.format(column, value)
|
||||
yield 'select {} in (select {} from tab_00386) from tab_00386'.format(column, column)
|
||||
yield "select {} in {} from tab_00386".format(column, value)
|
||||
yield "select {} in tuple({}) from tab_00386".format(column, value)
|
||||
yield "select {} in (select {} from tab_00386) from tab_00386".format(
|
||||
column, column
|
||||
)
|
||||
|
||||
for i in range(len(columns)):
|
||||
for j in range(i, len(columns)):
|
||||
yield 'select ({}, {}) in tuple({}, {}) from tab_00386'.format(columns[i], columns[j], values[i], values[j])
|
||||
yield 'select ({}, {}) in (select {}, {} from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j])
|
||||
yield 'select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j])
|
||||
yield "select ({}, {}) in tuple({}, {}) from tab_00386".format(
|
||||
columns[i], columns[j], values[i], values[j]
|
||||
)
|
||||
yield "select ({}, {}) in (select {}, {} from tab_00386) from tab_00386".format(
|
||||
columns[i], columns[j], columns[i], columns[j]
|
||||
)
|
||||
yield "select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386".format(
|
||||
columns[i], columns[j], columns[i], columns[j]
|
||||
)
|
||||
|
||||
yield "select e in (1, 'a') from tab_00386"
|
||||
yield "select f in tuple((1, 'a')) from tab_00386"
|
||||
yield "select f in tuple(tuple((1, 'a'))) from tab_00386"
|
||||
|
||||
yield 'select e in (select a, b from tab_00386) from tab_00386'
|
||||
yield 'select e in (select (a, b) from tab_00386) from tab_00386'
|
||||
yield 'select f in (select tuple((a, b)) from tab_00386) from tab_00386'
|
||||
yield 'select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386'
|
||||
yield "select e in (select a, b from tab_00386) from tab_00386"
|
||||
yield "select e in (select (a, b) from tab_00386) from tab_00386"
|
||||
yield "select f in (select tuple((a, b)) from tab_00386) from tab_00386"
|
||||
yield "select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386"
|
||||
|
||||
|
||||
import requests
|
||||
import os
|
||||
|
||||
|
||||
def main():
|
||||
url = os.environ['CLICKHOUSE_URL']
|
||||
url = os.environ["CLICKHOUSE_URL"]
|
||||
|
||||
for q in gen_queries():
|
||||
resp = requests.post(url, data=q)
|
||||
if resp.status_code != 200 or resp.text.strip() not in ('1', ''):
|
||||
print('Query:', q)
|
||||
print('Code:', resp.status_code)
|
||||
if resp.status_code != 200 or resp.text.strip() not in ("1", ""):
|
||||
print("Query:", q)
|
||||
print("Code:", resp.status_code)
|
||||
print(resp.text)
|
||||
break
|
||||
|
||||
requests.post(url, data='drop table tab_00386')
|
||||
requests.post(url, data="drop table tab_00386")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
@ -2,8 +2,20 @@
|
||||
|
||||
import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys
|
||||
|
||||
|
||||
def get_ch_answer(query):
|
||||
return urllib.request.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ), data=query.encode()).read().decode()
|
||||
return (
|
||||
urllib.request.urlopen(
|
||||
os.environ.get(
|
||||
"CLICKHOUSE_URL",
|
||||
"http://localhost:" + os.environ.get("CLICKHOUSE_PORT_HTTP", "8123"),
|
||||
),
|
||||
data=query.encode(),
|
||||
)
|
||||
.read()
|
||||
.decode()
|
||||
)
|
||||
|
||||
|
||||
def check_answers(query, answer):
|
||||
ch_answer = get_ch_answer(query)
|
||||
@ -13,36 +25,34 @@ def check_answers(query, answer):
|
||||
print("Fetched answer :", ch_answer)
|
||||
exit(-1)
|
||||
|
||||
|
||||
def get_values():
|
||||
values = [0, 1, -1]
|
||||
for bits in [8, 16, 32, 64]:
|
||||
values += [2**bits, 2**bits - 1]
|
||||
values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1]
|
||||
values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1]
|
||||
values += [2 ** (bits - 1) - 1, 2 ** (bits - 1), 2 ** (bits - 1) + 1]
|
||||
values += [-(2 ** (bits - 1)) - 1, -(2 ** (bits - 1)), -(2 ** (bits - 1)) + 1]
|
||||
return values
|
||||
|
||||
|
||||
def is_valid_integer(x):
|
||||
return -2**63 <= x and x <= 2**64-1
|
||||
return -(2**63) <= x and x <= 2**64 - 1
|
||||
|
||||
|
||||
TEST_WITH_CASTING=True
|
||||
GENERATE_TEST_FILES=False
|
||||
TEST_WITH_CASTING = True
|
||||
GENERATE_TEST_FILES = False
|
||||
|
||||
TYPES = {
|
||||
"UInt8" : { "bits" : 8, "sign" : False, "float" : False },
|
||||
"Int8" : { "bits" : 8, "sign" : True, "float" : False },
|
||||
|
||||
"UInt16": { "bits" : 16, "sign" : False, "float" : False },
|
||||
"Int16" : { "bits" : 16, "sign" : True, "float" : False },
|
||||
|
||||
"UInt32": { "bits" : 32, "sign" : False, "float" : False },
|
||||
"Int32" : { "bits" : 32, "sign" : True, "float" : False },
|
||||
|
||||
"UInt64": { "bits" : 64, "sign" : False, "float" : False },
|
||||
"Int64" : { "bits" : 64, "sign" : True, "float" : False }
|
||||
|
||||
#"Float32" : { "bits" : 32, "sign" : True, "float" : True },
|
||||
#"Float64" : { "bits" : 64, "sign" : True, "float" : True }
|
||||
"UInt8": {"bits": 8, "sign": False, "float": False},
|
||||
"Int8": {"bits": 8, "sign": True, "float": False},
|
||||
"UInt16": {"bits": 16, "sign": False, "float": False},
|
||||
"Int16": {"bits": 16, "sign": True, "float": False},
|
||||
"UInt32": {"bits": 32, "sign": False, "float": False},
|
||||
"Int32": {"bits": 32, "sign": True, "float": False},
|
||||
"UInt64": {"bits": 64, "sign": False, "float": False},
|
||||
"Int64": {"bits": 64, "sign": True, "float": False}
|
||||
# "Float32" : { "bits" : 32, "sign" : True, "float" : True },
|
||||
# "Float64" : { "bits" : 64, "sign" : True, "float" : True }
|
||||
}
|
||||
|
||||
|
||||
@ -55,14 +65,18 @@ def inside_range(value, type_name):
|
||||
return True
|
||||
|
||||
if signed:
|
||||
return -2**(bits-1) <= value and value <= 2**(bits-1) - 1
|
||||
return -(2 ** (bits - 1)) <= value and value <= 2 ** (bits - 1) - 1
|
||||
else:
|
||||
return 0 <= value and value <= 2**bits - 1
|
||||
|
||||
|
||||
def test_operators(v1, v2, v1_passed, v2_passed):
|
||||
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed)
|
||||
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed)
|
||||
query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(
|
||||
v1=v1_passed, v2=v2_passed
|
||||
)
|
||||
query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(
|
||||
v1=v2_passed, v2=v1_passed
|
||||
)
|
||||
|
||||
answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2]
|
||||
answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1]
|
||||
@ -74,6 +88,7 @@ def test_operators(v1, v2, v1_passed, v2_passed):
|
||||
|
||||
VALUES = [x for x in get_values() if is_valid_integer(x)]
|
||||
|
||||
|
||||
def test_pair(v1, v2):
|
||||
query = "SELECT {}, {}, ".format(v1, v2)
|
||||
answers = "{}\t{}\t".format(v1, v2)
|
||||
@ -87,19 +102,58 @@ def test_pair(v1, v2):
|
||||
if inside_range(v1, t1):
|
||||
for t2 in TYPES.keys():
|
||||
if inside_range(v2, t2):
|
||||
q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2))
|
||||
query += ', ' + q
|
||||
q, a = test_operators(
|
||||
v1, v2, "to{}({})".format(t1, v1), "to{}({})".format(t2, v2)
|
||||
)
|
||||
query += ", " + q
|
||||
answers += "\t" + a
|
||||
|
||||
check_answers(query, answers)
|
||||
return query, answers
|
||||
|
||||
|
||||
VALUES_INT = [0, -1, 1, 2**64-1, 2**63, -2**63, 2**63-1, 2**51, 2**52, 2**53-1, 2**53, 2**53+1, 2**53+2, -2**53+1, -2**53, -2**53-1, -2**53-2, 2*52, -2**52]
|
||||
VALUES_FLOAT = [float(x) for x in VALUES_INT + [-0.5, 0.5, -1.5, 1.5, 2**53, 2**51 - 0.5, 2**51 + 0.5, 2**60, -2**60, -2**63 - 10000, 2**63 + 10000]]
|
||||
VALUES_INT = [
|
||||
0,
|
||||
-1,
|
||||
1,
|
||||
2**64 - 1,
|
||||
2**63,
|
||||
-(2**63),
|
||||
2**63 - 1,
|
||||
2**51,
|
||||
2**52,
|
||||
2**53 - 1,
|
||||
2**53,
|
||||
2**53 + 1,
|
||||
2**53 + 2,
|
||||
-(2**53) + 1,
|
||||
-(2**53),
|
||||
-(2**53) - 1,
|
||||
-(2**53) - 2,
|
||||
2 * 52,
|
||||
-(2**52),
|
||||
]
|
||||
VALUES_FLOAT = [
|
||||
float(x)
|
||||
for x in VALUES_INT
|
||||
+ [
|
||||
-0.5,
|
||||
0.5,
|
||||
-1.5,
|
||||
1.5,
|
||||
2**53,
|
||||
2**51 - 0.5,
|
||||
2**51 + 0.5,
|
||||
2**60,
|
||||
-(2**60),
|
||||
-(2**63) - 10000,
|
||||
2**63 + 10000,
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def test_float_pair(i, f):
|
||||
f_str = ("%.9f" % f)
|
||||
f_str = "%.9f" % f
|
||||
query = "SELECT '{}', '{}', ".format(i, f_str)
|
||||
answers = "{}\t{}\t".format(i, f_str)
|
||||
|
||||
@ -110,8 +164,8 @@ def test_float_pair(i, f):
|
||||
if TEST_WITH_CASTING:
|
||||
for t1 in TYPES.keys():
|
||||
if inside_range(i, t1):
|
||||
q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str)
|
||||
query += ', ' + q
|
||||
q, a = test_operators(i, f, "to{}({})".format(t1, i), f_str)
|
||||
query += ", " + q
|
||||
answers += "\t" + a
|
||||
|
||||
check_answers(query, answers)
|
||||
@ -120,22 +174,26 @@ def test_float_pair(i, f):
|
||||
|
||||
def main():
|
||||
if GENERATE_TEST_FILES:
|
||||
base_name = '00411_accurate_number_comparison'
|
||||
sql_file = open(base_name + '.sql', 'wt')
|
||||
ref_file = open(base_name + '.reference', 'wt')
|
||||
base_name = "00411_accurate_number_comparison"
|
||||
sql_file = open(base_name + ".sql", "wt")
|
||||
ref_file = open(base_name + ".reference", "wt")
|
||||
|
||||
num_int_tests = len(list(itertools.combinations(VALUES, 2)))
|
||||
|
||||
num_parts = 4
|
||||
for part in range(0, num_parts):
|
||||
if 'int' + str(part + 1) in sys.argv[1:]:
|
||||
for (v1, v2) in itertools.islice(itertools.combinations(VALUES, 2), part * num_int_tests // num_parts, (part + 1) * num_int_tests // num_parts):
|
||||
if "int" + str(part + 1) in sys.argv[1:]:
|
||||
for (v1, v2) in itertools.islice(
|
||||
itertools.combinations(VALUES, 2),
|
||||
part * num_int_tests // num_parts,
|
||||
(part + 1) * num_int_tests // num_parts,
|
||||
):
|
||||
q, a = test_pair(v1, v2)
|
||||
if GENERATE_TEST_FILES:
|
||||
sql_file.write(q + ";\n")
|
||||
ref_file.write(a + "\n")
|
||||
|
||||
if 'float' in sys.argv[1:]:
|
||||
if "float" in sys.argv[1:]:
|
||||
for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT):
|
||||
q, a = test_float_pair(i, f)
|
||||
if GENERATE_TEST_FILES:
|
||||
|
@ -12,6 +12,7 @@ import subprocess
|
||||
from io import StringIO
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
|
||||
def is_ipv6(host):
|
||||
try:
|
||||
socket.inet_aton(host)
|
||||
@ -19,6 +20,7 @@ def is_ipv6(host):
|
||||
except:
|
||||
return True
|
||||
|
||||
|
||||
def get_local_port(host, ipv6):
|
||||
if ipv6:
|
||||
family = socket.AF_INET6
|
||||
@ -29,8 +31,9 @@ def get_local_port(host, ipv6):
|
||||
fd.bind((host, 0))
|
||||
return fd.getsockname()[1]
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||
|
||||
#####################################################################################
|
||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||
@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
#####################################################################################
|
||||
|
||||
# IP-address of this host accessible from the outside world. Get the first one
|
||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
||||
HTTP_SERVER_HOST = (
|
||||
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||
)
|
||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||
|
||||
# IP address and port of the HTTP server started from this script.
|
||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||
if IS_IPV6:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://"
|
||||
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/"
|
||||
)
|
||||
else:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||
)
|
||||
|
||||
CSV_DATA = os.path.join(
|
||||
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||
)
|
||||
|
||||
CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
||||
|
||||
def get_ch_answer(query):
|
||||
host = CLICKHOUSE_HOST
|
||||
if IS_IPV6:
|
||||
host = f'[{host}]'
|
||||
host = f"[{host}]"
|
||||
|
||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
||||
url = os.environ.get(
|
||||
"CLICKHOUSE_URL",
|
||||
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||
)
|
||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||
|
||||
|
||||
def check_answers(query, answer):
|
||||
ch_answer = get_ch_answer(query)
|
||||
if ch_answer.strip() != answer.strip():
|
||||
@ -68,18 +86,19 @@ def check_answers(query, answer):
|
||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||
raise Exception("Fail on query")
|
||||
|
||||
|
||||
class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
def _set_headers(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/csv')
|
||||
self.send_header("Content-type", "text/csv")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
self._set_headers()
|
||||
with open(CSV_DATA, 'r') as fl:
|
||||
reader = csv.reader(fl, delimiter=',')
|
||||
with open(CSV_DATA, "r") as fl:
|
||||
reader = csv.reader(fl, delimiter=",")
|
||||
for row in reader:
|
||||
self.wfile.write((', '.join(row) + '\n').encode())
|
||||
self.wfile.write((", ".join(row) + "\n").encode())
|
||||
return
|
||||
|
||||
def do_HEAD(self):
|
||||
@ -87,33 +106,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
def read_chunk(self):
|
||||
msg = ''
|
||||
msg = ""
|
||||
while True:
|
||||
sym = self.rfile.read(1)
|
||||
if sym == '':
|
||||
if sym == "":
|
||||
break
|
||||
msg += sym.decode('utf-8')
|
||||
if msg.endswith('\r\n'):
|
||||
msg += sym.decode("utf-8")
|
||||
if msg.endswith("\r\n"):
|
||||
break
|
||||
length = int(msg[:-2], 16)
|
||||
if length == 0:
|
||||
return ''
|
||||
return ""
|
||||
content = self.rfile.read(length)
|
||||
self.rfile.read(2) # read sep \r\n
|
||||
return content.decode('utf-8')
|
||||
self.rfile.read(2) # read sep \r\n
|
||||
return content.decode("utf-8")
|
||||
|
||||
def do_POST(self):
|
||||
data = ''
|
||||
data = ""
|
||||
while True:
|
||||
chunk = self.read_chunk()
|
||||
if not chunk:
|
||||
break
|
||||
data += chunk
|
||||
with StringIO(data) as fl:
|
||||
reader = csv.reader(fl, delimiter=',')
|
||||
with open(CSV_DATA, 'a') as d:
|
||||
reader = csv.reader(fl, delimiter=",")
|
||||
with open(CSV_DATA, "a") as d:
|
||||
for row in reader:
|
||||
d.write(','.join(row) + '\n')
|
||||
d.write(",".join(row) + "\n")
|
||||
self._set_headers()
|
||||
self.wfile.write(b"ok")
|
||||
|
||||
@ -124,6 +143,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
class HTTPServerV6(HTTPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
|
||||
def start_server():
|
||||
if IS_IPV6:
|
||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
||||
@ -133,49 +153,76 @@ def start_server():
|
||||
t = threading.Thread(target=httpd.serve_forever)
|
||||
return t, httpd
|
||||
|
||||
|
||||
# test section
|
||||
|
||||
def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""):
|
||||
with open(CSV_DATA, 'w') as f: # clear file
|
||||
f.write('')
|
||||
|
||||
def test_select(
|
||||
table_name="",
|
||||
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||
requests=[],
|
||||
answers=[],
|
||||
test_data="",
|
||||
):
|
||||
with open(CSV_DATA, "w") as f: # clear file
|
||||
f.write("")
|
||||
|
||||
if test_data:
|
||||
with open(CSV_DATA, 'w') as f:
|
||||
with open(CSV_DATA, "w") as f:
|
||||
f.write(test_data + "\n")
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
||||
get_ch_answer(
|
||||
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||
table_name, schema, HTTP_SERVER_URL_STR
|
||||
)
|
||||
)
|
||||
|
||||
for i in range(len(requests)):
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
check_answers(requests[i].format(tbl=tbl), answers[i])
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
|
||||
|
||||
def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]):
|
||||
with open(CSV_DATA, 'w') as f: # flush test file
|
||||
f.write('')
|
||||
def test_insert(
|
||||
table_name="",
|
||||
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||
requests_insert=[],
|
||||
requests_select=[],
|
||||
answers=[],
|
||||
):
|
||||
with open(CSV_DATA, "w") as f: # flush test file
|
||||
f.write("")
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
||||
get_ch_answer(
|
||||
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||
table_name, schema, HTTP_SERVER_URL_STR
|
||||
)
|
||||
)
|
||||
|
||||
for req in requests_insert:
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
get_ch_answer(req.format(tbl=tbl))
|
||||
|
||||
|
||||
for i in range(len(requests_select)):
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
||||
|
||||
if table_name:
|
||||
@ -185,9 +232,11 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do
|
||||
def main():
|
||||
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
||||
select_only_requests = {
|
||||
"select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'),
|
||||
"select numuint, count(*) from {tbl} group by numuint" : "2\t2",
|
||||
"select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'),
|
||||
"select str,numuint,numint,double from {tbl}": test_data.replace(",", "\t"),
|
||||
"select numuint, count(*) from {tbl} group by numuint": "2\t2",
|
||||
"select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[
|
||||
0
|
||||
].replace(",", "\t"),
|
||||
}
|
||||
|
||||
insert_requests = [
|
||||
@ -196,21 +245,41 @@ def main():
|
||||
]
|
||||
|
||||
select_requests = {
|
||||
"select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]),
|
||||
"select count(*) from {tbl}": '12',
|
||||
'select double, count(*) from {tbl} group by double order by double': "7.7\t2\n9.9\t10"
|
||||
"select distinct numuint from {tbl} order by numuint": "\n".join(
|
||||
[str(i) for i in range(11)]
|
||||
),
|
||||
"select count(*) from {tbl}": "12",
|
||||
"select double, count(*) from {tbl} group by double order by double": "7.7\t2\n9.9\t10",
|
||||
}
|
||||
|
||||
t, httpd = start_server()
|
||||
t.start()
|
||||
# test table with url engine
|
||||
test_select(table_name="test_table_select", requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data)
|
||||
test_select(
|
||||
table_name="test_table_select",
|
||||
requests=list(select_only_requests.keys()),
|
||||
answers=list(select_only_requests.values()),
|
||||
test_data=test_data,
|
||||
)
|
||||
# test table function url
|
||||
test_select(requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data)
|
||||
#test insert into table with url engine
|
||||
test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values()))
|
||||
#test insert into table function url
|
||||
test_insert(requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values()))
|
||||
test_select(
|
||||
requests=list(select_only_requests.keys()),
|
||||
answers=list(select_only_requests.values()),
|
||||
test_data=test_data,
|
||||
)
|
||||
# test insert into table with url engine
|
||||
test_insert(
|
||||
table_name="test_table_insert",
|
||||
requests_insert=insert_requests,
|
||||
requests_select=list(select_requests.keys()),
|
||||
answers=list(select_requests.values()),
|
||||
)
|
||||
# test insert into table function url
|
||||
test_insert(
|
||||
requests_insert=insert_requests,
|
||||
requests_select=list(select_requests.keys()),
|
||||
answers=list(select_requests.values()),
|
||||
)
|
||||
|
||||
httpd.shutdown()
|
||||
t.join()
|
||||
|
@ -12,35 +12,46 @@ HAYSTACKS = [
|
||||
|
||||
NEEDLE = "needle"
|
||||
|
||||
HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE)
|
||||
NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE)
|
||||
HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE)
|
||||
NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def replace_follow_case(replacement):
|
||||
def func(match):
|
||||
g = match.group()
|
||||
if g.islower(): return replacement.lower()
|
||||
if g.istitle(): return replacement.title()
|
||||
if g.isupper(): return replacement.upper()
|
||||
if g.islower():
|
||||
return replacement.lower()
|
||||
if g.istitle():
|
||||
return replacement.title()
|
||||
if g.isupper():
|
||||
return replacement.upper()
|
||||
return replacement
|
||||
|
||||
return func
|
||||
|
||||
|
||||
def replace_separators(query, new_sep):
|
||||
SEP_RE = re.compile('\\s+')
|
||||
SEP_RE = re.compile("\\s+")
|
||||
result = SEP_RE.sub(new_sep, query)
|
||||
return result
|
||||
|
||||
def enlarge_haystack(query, times, separator=''):
|
||||
return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query)
|
||||
|
||||
def enlarge_haystack(query, times, separator=""):
|
||||
return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query)
|
||||
|
||||
|
||||
def small_needle(query):
|
||||
return NEEDLE_RE.sub(replace_follow_case('n'), query)
|
||||
return NEEDLE_RE.sub(replace_follow_case("n"), query)
|
||||
|
||||
|
||||
def remove_needle(query):
|
||||
return NEEDLE_RE.sub('', query)
|
||||
return NEEDLE_RE.sub("", query)
|
||||
|
||||
|
||||
def replace_needle(query, new_needle):
|
||||
return NEEDLE_RE.sub(new_needle, query)
|
||||
|
||||
|
||||
# with str.lower, str.uppert, str.title and such
|
||||
def transform_needle(query, string_transformation_func):
|
||||
def replace_with_transformation(match):
|
||||
@ -49,19 +60,21 @@ def transform_needle(query, string_transformation_func):
|
||||
|
||||
return NEEDLE_RE.sub(replace_with_transformation, query)
|
||||
|
||||
def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template):
|
||||
|
||||
def create_cases(
|
||||
case_sensitive_func,
|
||||
case_insensitive_func,
|
||||
table_row_template,
|
||||
table_query_template,
|
||||
const_query_template,
|
||||
):
|
||||
const_queries = []
|
||||
table_rows = []
|
||||
table_queries = set()
|
||||
|
||||
def add_case(func, haystack, needle, match):
|
||||
match = int(match)
|
||||
args = dict(
|
||||
func = func,
|
||||
haystack = haystack,
|
||||
needle = needle,
|
||||
match = match
|
||||
)
|
||||
args = dict(func=func, haystack=haystack, needle=needle, match=match)
|
||||
const_queries.append(const_query_template.substitute(args))
|
||||
table_queries.add(table_query_template.substitute(args))
|
||||
table_rows.append(table_row_template.substitute(args))
|
||||
@ -69,14 +82,28 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
||||
def add_case_sensitive(haystack, needle, match):
|
||||
add_case(case_sensitive_func, haystack, needle, match)
|
||||
if match:
|
||||
add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match)
|
||||
add_case(
|
||||
case_sensitive_func,
|
||||
transform_needle(haystack, str.swapcase),
|
||||
transform_needle(needle, str.swapcase),
|
||||
match,
|
||||
)
|
||||
|
||||
def add_case_insensitive(haystack, needle, match):
|
||||
add_case(case_insensitive_func, haystack, needle, match)
|
||||
if match:
|
||||
add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match)
|
||||
add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match)
|
||||
|
||||
add_case(
|
||||
case_insensitive_func,
|
||||
transform_needle(haystack, str.swapcase),
|
||||
needle,
|
||||
match,
|
||||
)
|
||||
add_case(
|
||||
case_insensitive_func,
|
||||
haystack,
|
||||
transform_needle(needle, str.swapcase),
|
||||
match,
|
||||
)
|
||||
|
||||
# Negative cases
|
||||
add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False)
|
||||
@ -85,7 +112,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
||||
for haystack in HAYSTACKS:
|
||||
add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False)
|
||||
|
||||
sep = ''
|
||||
sep = ""
|
||||
h = replace_separators(haystack, sep)
|
||||
|
||||
add_case_sensitive(h, NEEDLE, False)
|
||||
@ -102,8 +129,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
||||
add_case_sensitive(haystack, NEEDLE, True)
|
||||
add_case_insensitive(haystack, NEEDLE, True)
|
||||
|
||||
|
||||
for sep in list(''' ,'''):
|
||||
for sep in list(""" ,"""):
|
||||
h = replace_separators(haystack, sep)
|
||||
add_case_sensitive(h, NEEDLE, True)
|
||||
add_case_sensitive(small_needle(h), small_needle(NEEDLE), True)
|
||||
@ -114,32 +140,43 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template,
|
||||
add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True)
|
||||
|
||||
# case insesitivity works only on ASCII strings
|
||||
add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True)
|
||||
add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True)
|
||||
add_case_sensitive(
|
||||
replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True
|
||||
)
|
||||
add_case_sensitive(
|
||||
replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True
|
||||
)
|
||||
|
||||
for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']:
|
||||
for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]:
|
||||
h = replace_separators(HAYSTACKS[0], sep)
|
||||
add_case(case_sensitive_func, h, NEEDLE, True)
|
||||
|
||||
return table_rows, table_queries, const_queries
|
||||
|
||||
def main():
|
||||
|
||||
def main():
|
||||
def query(x):
|
||||
print(x)
|
||||
|
||||
CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""")
|
||||
TABLE_QUERY = Template("""WITH '${needle}' as n
|
||||
CONST_QUERY = Template(
|
||||
"""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};"""
|
||||
)
|
||||
TABLE_QUERY = Template(
|
||||
"""WITH '${needle}' as n
|
||||
SELECT haystack, needle, ${func}(haystack, n) as result
|
||||
FROM ht
|
||||
WHERE func = '${func}' AND needle = n AND result != match;""")
|
||||
WHERE func = '${func}' AND needle = n AND result != match;"""
|
||||
)
|
||||
TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""")
|
||||
|
||||
rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY)
|
||||
rows, table_queries, const_queries = create_cases(
|
||||
"hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY
|
||||
)
|
||||
for q in const_queries:
|
||||
query(q)
|
||||
|
||||
query("""DROP TABLE IF EXISTS ht;
|
||||
query(
|
||||
"""DROP TABLE IF EXISTS ht;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
ht
|
||||
(
|
||||
@ -150,11 +187,15 @@ def main():
|
||||
)
|
||||
ENGINE MergeTree()
|
||||
ORDER BY haystack;
|
||||
INSERT INTO ht VALUES {values};""".format(values=", ".join(rows)))
|
||||
INSERT INTO ht VALUES {values};""".format(
|
||||
values=", ".join(rows)
|
||||
)
|
||||
)
|
||||
for q in sorted(table_queries):
|
||||
query(q)
|
||||
|
||||
query("""DROP TABLE ht""")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -8,28 +8,32 @@ import sys
|
||||
import signal
|
||||
|
||||
|
||||
CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT')
|
||||
CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL')
|
||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
||||
CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT")
|
||||
CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL")
|
||||
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||
|
||||
|
||||
def send_query(query):
|
||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||
cmd += ['--query', query]
|
||||
cmd += ["--query", query]
|
||||
# print(cmd)
|
||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
||||
return subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
).stdout
|
||||
|
||||
|
||||
def send_query_in_process_group(query):
|
||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||
cmd += ['--query', query]
|
||||
cmd += ["--query", query]
|
||||
# print(cmd)
|
||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid)
|
||||
return subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid
|
||||
)
|
||||
|
||||
|
||||
def read_lines_and_push_to_queue(pipe, queue):
|
||||
try:
|
||||
for line in iter(pipe.readline, ''):
|
||||
for line in iter(pipe.readline, ""):
|
||||
line = line.strip()
|
||||
print(line)
|
||||
sys.stdout.flush()
|
||||
@ -41,41 +45,44 @@ def read_lines_and_push_to_queue(pipe, queue):
|
||||
|
||||
|
||||
def test():
|
||||
send_query('DROP TABLE IF EXISTS test.lv').read()
|
||||
send_query('DROP TABLE IF EXISTS test.mt').read()
|
||||
send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read()
|
||||
send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read()
|
||||
send_query("DROP TABLE IF EXISTS test.lv").read()
|
||||
send_query("DROP TABLE IF EXISTS test.mt").read()
|
||||
send_query(
|
||||
"CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()"
|
||||
).read()
|
||||
send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read()
|
||||
|
||||
q = queue.Queue()
|
||||
p = send_query_in_process_group('WATCH test.lv')
|
||||
p = send_query_in_process_group("WATCH test.lv")
|
||||
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q))
|
||||
thread.start()
|
||||
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line == '0\t1')
|
||||
assert line == "0\t1"
|
||||
|
||||
send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read()
|
||||
send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read()
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line == '6\t2')
|
||||
assert line == "6\t2"
|
||||
|
||||
send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read()
|
||||
send_query("INSERT INTO test.mt VALUES (4),(5),(6)").read()
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line == '21\t3')
|
||||
assert line == "21\t3"
|
||||
|
||||
# Send Ctrl+C to client.
|
||||
os.killpg(os.getpgid(p.pid), signal.SIGINT)
|
||||
# This insert shouldn't affect lv.
|
||||
send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read()
|
||||
send_query("INSERT INTO test.mt VALUES (7),(8),(9)").read()
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line is None)
|
||||
assert line is None
|
||||
|
||||
send_query('DROP TABLE if exists test.lv').read()
|
||||
send_query('DROP TABLE if exists test.lv').read()
|
||||
send_query("DROP TABLE if exists test.lv").read()
|
||||
send_query("DROP TABLE if exists test.lv").read()
|
||||
|
||||
thread.join()
|
||||
|
||||
|
||||
test()
|
||||
|
@ -7,26 +7,30 @@ import os
|
||||
import sys
|
||||
|
||||
|
||||
CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT')
|
||||
CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL')
|
||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
||||
CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT")
|
||||
CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL")
|
||||
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||
|
||||
|
||||
def send_query(query):
|
||||
cmd = list(CLICKHOUSE_CLIENT.split())
|
||||
cmd += ['--query', query]
|
||||
cmd += ["--query", query]
|
||||
# print(cmd)
|
||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
||||
return subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
).stdout
|
||||
|
||||
|
||||
def send_http_query(query):
|
||||
cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10'])
|
||||
cmd += ['-sSN', CLICKHOUSE_URL, '-d', query]
|
||||
return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout
|
||||
cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10'])
|
||||
cmd += ["-sSN", CLICKHOUSE_URL, "-d", query]
|
||||
return subprocess.Popen(
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||
).stdout
|
||||
|
||||
|
||||
def read_lines_and_push_to_queue(pipe, queue):
|
||||
for line in iter(pipe.readline, ''):
|
||||
for line in iter(pipe.readline, ""):
|
||||
line = line.strip()
|
||||
print(line)
|
||||
sys.stdout.flush()
|
||||
@ -36,28 +40,31 @@ def read_lines_and_push_to_queue(pipe, queue):
|
||||
|
||||
|
||||
def test():
|
||||
send_query('DROP TABLE IF EXISTS test.lv').read()
|
||||
send_query('DROP TABLE IF EXISTS test.mt').read()
|
||||
send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read()
|
||||
send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read()
|
||||
send_query("DROP TABLE IF EXISTS test.lv").read()
|
||||
send_query("DROP TABLE IF EXISTS test.mt").read()
|
||||
send_query(
|
||||
"CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()"
|
||||
).read()
|
||||
send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read()
|
||||
|
||||
q = queue.Queue()
|
||||
pipe = send_http_query('WATCH test.lv')
|
||||
pipe = send_http_query("WATCH test.lv")
|
||||
thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q))
|
||||
thread.start()
|
||||
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line == '0\t1')
|
||||
assert line == "0\t1"
|
||||
|
||||
send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read()
|
||||
send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read()
|
||||
line = q.get()
|
||||
print(line)
|
||||
assert (line == '6\t2')
|
||||
assert line == "6\t2"
|
||||
|
||||
send_query('DROP TABLE if exists test.lv').read()
|
||||
send_query('DROP TABLE if exists test.lv').read()
|
||||
send_query("DROP TABLE if exists test.lv").read()
|
||||
send_query("DROP TABLE if exists test.lv").read()
|
||||
|
||||
thread.join()
|
||||
|
||||
|
||||
test()
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
from scipy import stats
|
||||
@ -6,70 +6,86 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
|
||||
def test_and_check(name, a, b, t_stat, p_value, precision=1e-2):
|
||||
client = ClickHouseClient()
|
||||
client.query("DROP TABLE IF EXISTS ttest;")
|
||||
client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;");
|
||||
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a])))
|
||||
client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b])))
|
||||
client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;")
|
||||
client.query(
|
||||
"INSERT INTO ttest VALUES {};".format(
|
||||
", ".join(["({},{})".format(i, 0) for i in a])
|
||||
)
|
||||
)
|
||||
client.query(
|
||||
"INSERT INTO ttest VALUES {};".format(
|
||||
", ".join(["({},{})".format(j, 1) for j in b])
|
||||
)
|
||||
)
|
||||
real = client.query_return_df(
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
||||
"roundBankers({}(left, right).2, 16) as p_value ".format(name) +
|
||||
"FROM ttest FORMAT TabSeparatedWithNames;")
|
||||
real_t_stat = real['t_stat'][0]
|
||||
real_p_value = real['p_value'][0]
|
||||
assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||
+ "roundBankers({}(left, right).2, 16) as p_value ".format(name)
|
||||
+ "FROM ttest FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
real_t_stat = real["t_stat"][0]
|
||||
real_p_value = real["p_value"][0]
|
||||
assert (
|
||||
abs(real_t_stat - np.float64(t_stat)) < precision
|
||||
), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert (
|
||||
abs(real_p_value - np.float64(p_value)) < precision
|
||||
), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||
client.query("DROP TABLE IF EXISTS ttest;")
|
||||
|
||||
|
||||
def test_student():
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True)
|
||||
test_and_check("studentTTest", rvs1, rvs2, s, p)
|
||||
|
||||
|
||||
def test_welch():
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=15, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=7, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=3, size=500), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=1, size=512), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=5, scale=10, size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=10, size=1024), 2)
|
||||
s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False)
|
||||
test_and_check("welchTTest", rvs1, rvs2, s, p)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_student()
|
||||
test_welch()
|
||||
print("Ok.")
|
||||
print("Ok.")
|
||||
|
@ -6,7 +6,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
@ -14,40 +14,51 @@ from pure_http_client import ClickHouseClient
|
||||
def test_and_check(name, a, b, t_stat, p_value):
|
||||
client = ClickHouseClient()
|
||||
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
||||
client.query("CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;");
|
||||
client.query("INSERT INTO mann_whitney VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)])))
|
||||
client.query(
|
||||
"CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"
|
||||
)
|
||||
client.query(
|
||||
"INSERT INTO mann_whitney VALUES {};".format(
|
||||
", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)])
|
||||
)
|
||||
)
|
||||
|
||||
real = client.query_return_df(
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
||||
"roundBankers({}(left, right).2, 16) as p_value ".format(name) +
|
||||
"FROM mann_whitney FORMAT TabSeparatedWithNames;")
|
||||
real_t_stat = real['t_stat'][0]
|
||||
real_p_value = real['p_value'][0]
|
||||
assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||
+ "roundBankers({}(left, right).2, 16) as p_value ".format(name)
|
||||
+ "FROM mann_whitney FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
real_t_stat = real["t_stat"][0]
|
||||
real_p_value = real["p_value"][0]
|
||||
assert abs(
|
||||
real_t_stat - np.float64(t_stat) < 1e-2
|
||||
), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert (
|
||||
abs(real_p_value - np.float64(p_value)) < 1e-2
|
||||
), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value)
|
||||
client.query("DROP TABLE IF EXISTS mann_whitney;")
|
||||
|
||||
|
||||
def test_mann_whitney():
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5)
|
||||
rvs2 = np.round(stats.expon.rvs(scale=0.2,size=500), 5)
|
||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='two-sided')
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 5)
|
||||
rvs2 = np.round(stats.expon.rvs(scale=0.2, size=500), 5)
|
||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided")
|
||||
test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p)
|
||||
test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)
|
||||
|
||||
equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5)
|
||||
s, p = stats.mannwhitneyu(equal, equal, alternative='two-sided')
|
||||
s, p = stats.mannwhitneyu(equal, equal, alternative="two-sided")
|
||||
test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p)
|
||||
|
||||
s, p = stats.mannwhitneyu(equal, equal, alternative='less', use_continuity=False)
|
||||
s, p = stats.mannwhitneyu(equal, equal, alternative="less", use_continuity=False)
|
||||
test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p)
|
||||
|
||||
|
||||
rvs1 = np.round(stats.cauchy.rvs(scale=10,size=65536), 5)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5)
|
||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='greater')
|
||||
rvs1 = np.round(stats.cauchy.rvs(scale=10, size=65536), 5)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=65536), 5)
|
||||
s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="greater")
|
||||
test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_mann_whitney()
|
||||
print("Ok.")
|
||||
print("Ok.")
|
||||
|
@ -4,14 +4,18 @@ from random import randint, choices
|
||||
import sys
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
client = ClickHouseClient()
|
||||
|
||||
N = 10
|
||||
create_query = "CREATE TABLE t_cnf_fuzz(" + ", ".join([f"c{i} UInt8" for i in range(N)]) + ") ENGINE = Memory"
|
||||
create_query = (
|
||||
"CREATE TABLE t_cnf_fuzz("
|
||||
+ ", ".join([f"c{i} UInt8" for i in range(N)])
|
||||
+ ") ENGINE = Memory"
|
||||
)
|
||||
|
||||
client.query("DROP TABLE IF EXISTS t_cnf_fuzz")
|
||||
client.query(create_query)
|
||||
@ -35,6 +39,7 @@ client.query(insert_query)
|
||||
MAX_CLAUSES = 10
|
||||
MAX_ATOMS = 5
|
||||
|
||||
|
||||
def generate_dnf():
|
||||
clauses = []
|
||||
num_clauses = randint(1, MAX_CLAUSES)
|
||||
@ -42,12 +47,17 @@ def generate_dnf():
|
||||
num_atoms = randint(1, MAX_ATOMS)
|
||||
atom_ids = choices(range(N), k=num_atoms)
|
||||
negates = choices([0, 1], k=num_atoms)
|
||||
atoms = [f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)]
|
||||
atoms = [
|
||||
f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)
|
||||
]
|
||||
clauses.append("(" + " AND ".join(atoms) + ")")
|
||||
|
||||
return " OR ".join(clauses)
|
||||
|
||||
select_query = "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}"
|
||||
|
||||
select_query = (
|
||||
"SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}"
|
||||
)
|
||||
|
||||
fail_report = """
|
||||
Failed query: '{}'.
|
||||
|
@ -5,15 +5,20 @@ import random
|
||||
import string
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
|
||||
def get_random_string(length):
|
||||
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
|
||||
return "".join(
|
||||
random.choice(string.ascii_uppercase + string.digits) for _ in range(length)
|
||||
)
|
||||
|
||||
|
||||
client = ClickHouseClient()
|
||||
|
||||
|
||||
def insert_block(table_name, block_granularity_rows, block_rows):
|
||||
global client
|
||||
block_data = []
|
||||
@ -25,9 +30,12 @@ def insert_block(table_name, block_granularity_rows, block_rows):
|
||||
values_row = ", ".join("(1, '" + row + "')" for row in block_data)
|
||||
client.query("INSERT INTO {} VALUES {}".format(table_name, values_row))
|
||||
|
||||
|
||||
try:
|
||||
client.query("DROP TABLE IF EXISTS t")
|
||||
client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0")
|
||||
client.query(
|
||||
"CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0"
|
||||
)
|
||||
|
||||
client.query("SYSTEM STOP MERGES t")
|
||||
|
||||
@ -53,6 +61,10 @@ try:
|
||||
client.query("SYSTEM START MERGES t")
|
||||
client.query("OPTIMIZE TABLE t FINAL")
|
||||
|
||||
print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0])
|
||||
print(
|
||||
client.query_return_df(
|
||||
"SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames"
|
||||
)["C"][0]
|
||||
)
|
||||
finally:
|
||||
client.query("DROP TABLE IF EXISTS t")
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from http.server import SimpleHTTPRequestHandler,HTTPServer
|
||||
from http.server import SimpleHTTPRequestHandler, HTTPServer
|
||||
import socket
|
||||
import csv
|
||||
import sys
|
||||
@ -21,6 +21,7 @@ def is_ipv6(host):
|
||||
except:
|
||||
return True
|
||||
|
||||
|
||||
def get_local_port(host, ipv6):
|
||||
if ipv6:
|
||||
family = socket.AF_INET6
|
||||
@ -31,8 +32,9 @@ def get_local_port(host, ipv6):
|
||||
fd.bind((host, 0))
|
||||
return fd.getsockname()[1]
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost")
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||
|
||||
#####################################################################################
|
||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||
@ -42,16 +44,24 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
#####################################################################################
|
||||
|
||||
# IP-address of this host accessible from the outside world. Get the first one
|
||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
||||
HTTP_SERVER_HOST = (
|
||||
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||
)
|
||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||
|
||||
# IP address and port of the HTTP server started from this script.
|
||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||
if IS_IPV6:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://"
|
||||
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/"
|
||||
)
|
||||
else:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||
)
|
||||
|
||||
# Because we need to check the content of file.csv we can create this content and avoid reading csv
|
||||
CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n"
|
||||
@ -59,19 +69,24 @@ CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555
|
||||
|
||||
# Choose compression method
|
||||
# (Will change during test, need to check standard data sending, to make sure that nothing broke)
|
||||
COMPRESS_METHOD = 'none'
|
||||
ADDING_ENDING = ''
|
||||
ENDINGS = ['.gz', '.xz']
|
||||
COMPRESS_METHOD = "none"
|
||||
ADDING_ENDING = ""
|
||||
ENDINGS = [".gz", ".xz"]
|
||||
SEND_ENCODING = True
|
||||
|
||||
|
||||
def get_ch_answer(query):
|
||||
host = CLICKHOUSE_HOST
|
||||
if IS_IPV6:
|
||||
host = f'[{host}]'
|
||||
host = f"[{host}]"
|
||||
|
||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
||||
url = os.environ.get(
|
||||
"CLICKHOUSE_URL",
|
||||
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||
)
|
||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||
|
||||
|
||||
def check_answers(query, answer):
|
||||
ch_answer = get_ch_answer(query)
|
||||
if ch_answer.strip() != answer.strip():
|
||||
@ -80,18 +95,19 @@ def check_answers(query, answer):
|
||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||
raise Exception("Fail on query")
|
||||
|
||||
|
||||
# Server with head method which is useful for debuging by hands
|
||||
class HttpProcessor(SimpleHTTPRequestHandler):
|
||||
def _set_headers(self):
|
||||
self.send_response(200)
|
||||
if SEND_ENCODING:
|
||||
self.send_header('Content-Encoding', COMPRESS_METHOD)
|
||||
if COMPRESS_METHOD == 'none':
|
||||
self.send_header('Content-Length', len(CSV_DATA.encode()))
|
||||
self.send_header("Content-Encoding", COMPRESS_METHOD)
|
||||
if COMPRESS_METHOD == "none":
|
||||
self.send_header("Content-Length", len(CSV_DATA.encode()))
|
||||
else:
|
||||
self.compress_data()
|
||||
self.send_header('Content-Length', len(self.data))
|
||||
self.send_header('Content-Type', 'text/csv')
|
||||
self.send_header("Content-Length", len(self.data))
|
||||
self.send_header("Content-Type", "text/csv")
|
||||
self.end_headers()
|
||||
|
||||
def do_HEAD(self):
|
||||
@ -99,18 +115,17 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
||||
return
|
||||
|
||||
def compress_data(self):
|
||||
if COMPRESS_METHOD == 'gzip':
|
||||
if COMPRESS_METHOD == "gzip":
|
||||
self.data = gzip.compress((CSV_DATA).encode())
|
||||
elif COMPRESS_METHOD == 'lzma':
|
||||
elif COMPRESS_METHOD == "lzma":
|
||||
self.data = lzma.compress((CSV_DATA).encode())
|
||||
else:
|
||||
self.data = 'WRONG CONVERSATION'.encode()
|
||||
|
||||
self.data = "WRONG CONVERSATION".encode()
|
||||
|
||||
def do_GET(self):
|
||||
self._set_headers()
|
||||
|
||||
if COMPRESS_METHOD == 'none':
|
||||
if COMPRESS_METHOD == "none":
|
||||
self.wfile.write(CSV_DATA.encode())
|
||||
else:
|
||||
self.wfile.write(self.data)
|
||||
@ -119,9 +134,11 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
return
|
||||
|
||||
|
||||
class HTTPServerV6(HTTPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
|
||||
def start_server(requests_amount):
|
||||
if IS_IPV6:
|
||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
||||
@ -135,52 +152,60 @@ def start_server(requests_amount):
|
||||
t = threading.Thread(target=real_func)
|
||||
return t
|
||||
|
||||
|
||||
#####################################################################
|
||||
# Testing area.
|
||||
#####################################################################
|
||||
|
||||
def test_select(dict_name="", schema="word String, counter UInt32", requests=[], answers=[], test_data=""):
|
||||
|
||||
def test_select(
|
||||
dict_name="",
|
||||
schema="word String, counter UInt32",
|
||||
requests=[],
|
||||
answers=[],
|
||||
test_data="",
|
||||
):
|
||||
global ADDING_ENDING
|
||||
global SEND_ENCODING
|
||||
global COMPRESS_METHOD
|
||||
for i in range(len(requests)):
|
||||
if i > 2:
|
||||
ADDING_ENDING = ENDINGS[i-3]
|
||||
ADDING_ENDING = ENDINGS[i - 3]
|
||||
SEND_ENCODING = False
|
||||
|
||||
if dict_name:
|
||||
get_ch_answer("drop dictionary if exists {}".format(dict_name))
|
||||
get_ch_answer('''CREATE DICTIONARY {} ({})
|
||||
get_ch_answer(
|
||||
"""CREATE DICTIONARY {} ({})
|
||||
PRIMARY KEY word
|
||||
SOURCE(HTTP(url '{}' format 'CSV'))
|
||||
LAYOUT(complex_key_hashed())
|
||||
LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING))
|
||||
LIFETIME(0)""".format(
|
||||
dict_name, schema, HTTP_SERVER_URL_STR + "/test.csv" + ADDING_ENDING
|
||||
)
|
||||
)
|
||||
|
||||
COMPRESS_METHOD = requests[i]
|
||||
print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING)
|
||||
check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i])
|
||||
|
||||
|
||||
def main():
|
||||
# first three for encoding, second three for url
|
||||
insert_requests = [
|
||||
'none',
|
||||
'gzip',
|
||||
'lzma',
|
||||
'gzip',
|
||||
'lzma'
|
||||
]
|
||||
insert_requests = ["none", "gzip", "lzma", "gzip", "lzma"]
|
||||
|
||||
# This answers got experemently in non compressed mode and they are correct
|
||||
answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5
|
||||
answers = ["""Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213"""] * 5
|
||||
|
||||
t = start_server(len(insert_requests))
|
||||
t.start()
|
||||
test_select(dict_name="test_table_select", requests=insert_requests, answers=answers)
|
||||
test_select(
|
||||
dict_name="test_table_select", requests=insert_requests, answers=answers
|
||||
)
|
||||
t.join()
|
||||
print("PASSED")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
@ -191,5 +216,3 @@ if __name__ == "__main__":
|
||||
sys.stderr.flush()
|
||||
|
||||
os._exit(1)
|
||||
|
||||
|
||||
|
@ -5,9 +5,10 @@ import socket
|
||||
import os
|
||||
import uuid
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
||||
CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
|
||||
CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||
CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000"))
|
||||
CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default")
|
||||
|
||||
|
||||
def writeVarUInt(x, ba):
|
||||
for _ in range(0, 9):
|
||||
@ -24,12 +25,12 @@ def writeVarUInt(x, ba):
|
||||
|
||||
|
||||
def writeStringBinary(s, ba):
|
||||
b = bytes(s, 'utf-8')
|
||||
b = bytes(s, "utf-8")
|
||||
writeVarUInt(len(s), ba)
|
||||
ba.extend(b)
|
||||
|
||||
|
||||
def readStrict(s, size = 1):
|
||||
def readStrict(s, size=1):
|
||||
res = bytearray()
|
||||
while size:
|
||||
cur = s.recv(size)
|
||||
@ -48,18 +49,23 @@ def readUInt(s, size=1):
|
||||
val += res[i] << (i * 8)
|
||||
return val
|
||||
|
||||
|
||||
def readUInt8(s):
|
||||
return readUInt(s)
|
||||
|
||||
|
||||
def readUInt16(s):
|
||||
return readUInt(s, 2)
|
||||
|
||||
|
||||
def readUInt32(s):
|
||||
return readUInt(s, 4)
|
||||
|
||||
|
||||
def readUInt64(s):
|
||||
return readUInt(s, 8)
|
||||
|
||||
|
||||
def readVarUInt(s):
|
||||
x = 0
|
||||
for i in range(9):
|
||||
@ -75,25 +81,25 @@ def readVarUInt(s):
|
||||
def readStringBinary(s):
|
||||
size = readVarUInt(s)
|
||||
s = readStrict(s, size)
|
||||
return s.decode('utf-8')
|
||||
return s.decode("utf-8")
|
||||
|
||||
|
||||
def sendHello(s):
|
||||
ba = bytearray()
|
||||
writeVarUInt(0, ba) # Hello
|
||||
writeStringBinary('simple native protocol', ba)
|
||||
writeVarUInt(0, ba) # Hello
|
||||
writeStringBinary("simple native protocol", ba)
|
||||
writeVarUInt(21, ba)
|
||||
writeVarUInt(9, ba)
|
||||
writeVarUInt(54449, ba)
|
||||
writeStringBinary('default', ba) # database
|
||||
writeStringBinary('default', ba) # user
|
||||
writeStringBinary('', ba) # pwd
|
||||
writeStringBinary("default", ba) # database
|
||||
writeStringBinary("default", ba) # user
|
||||
writeStringBinary("", ba) # pwd
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def receiveHello(s):
|
||||
p_type = readVarUInt(s)
|
||||
assert (p_type == 0) # Hello
|
||||
assert p_type == 0 # Hello
|
||||
server_name = readStringBinary(s)
|
||||
# print("Server name: ", server_name)
|
||||
server_version_major = readVarUInt(s)
|
||||
@ -111,78 +117,79 @@ def receiveHello(s):
|
||||
|
||||
|
||||
def serializeClientInfo(ba, query_id):
|
||||
writeStringBinary('default', ba) # initial_user
|
||||
writeStringBinary(query_id, ba) # initial_query_id
|
||||
writeStringBinary('127.0.0.1:9000', ba) # initial_address
|
||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||
ba.append(1) # TCP
|
||||
writeStringBinary('os_user', ba) # os_user
|
||||
writeStringBinary('client_hostname', ba) # client_hostname
|
||||
writeStringBinary('client_name', ba) # client_name
|
||||
writeStringBinary("default", ba) # initial_user
|
||||
writeStringBinary(query_id, ba) # initial_query_id
|
||||
writeStringBinary("127.0.0.1:9000", ba) # initial_address
|
||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||
ba.append(1) # TCP
|
||||
writeStringBinary("os_user", ba) # os_user
|
||||
writeStringBinary("client_hostname", ba) # client_hostname
|
||||
writeStringBinary("client_name", ba) # client_name
|
||||
writeVarUInt(21, ba)
|
||||
writeVarUInt(9, ba)
|
||||
writeVarUInt(54449, ba)
|
||||
writeStringBinary('', ba) # quota_key
|
||||
writeVarUInt(0, ba) # distributed_depth
|
||||
writeVarUInt(1, ba) # client_version_patch
|
||||
ba.append(0) # No telemetry
|
||||
writeStringBinary("", ba) # quota_key
|
||||
writeVarUInt(0, ba) # distributed_depth
|
||||
writeVarUInt(1, ba) # client_version_patch
|
||||
ba.append(0) # No telemetry
|
||||
|
||||
|
||||
def sendQuery(s, query):
|
||||
ba = bytearray()
|
||||
query_id = uuid.uuid4().hex
|
||||
writeVarUInt(1, ba) # query
|
||||
writeVarUInt(1, ba) # query
|
||||
writeStringBinary(query_id, ba)
|
||||
|
||||
ba.append(1) # INITIAL_QUERY
|
||||
ba.append(1) # INITIAL_QUERY
|
||||
|
||||
# client info
|
||||
serializeClientInfo(ba, query_id)
|
||||
|
||||
writeStringBinary('', ba) # No settings
|
||||
writeStringBinary('', ba) # No interserver secret
|
||||
writeVarUInt(2, ba) # Stage - Complete
|
||||
ba.append(0) # No compression
|
||||
writeStringBinary(query, ba) # query, finally
|
||||
writeStringBinary("", ba) # No settings
|
||||
writeStringBinary("", ba) # No interserver secret
|
||||
writeVarUInt(2, ba) # Stage - Complete
|
||||
ba.append(0) # No compression
|
||||
writeStringBinary(query, ba) # query, finally
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def serializeBlockInfo(ba):
|
||||
writeVarUInt(1, ba) # 1
|
||||
ba.append(0) # is_overflows
|
||||
writeVarUInt(2, ba) # 2
|
||||
writeVarUInt(0, ba) # 0
|
||||
ba.extend([0] * 4) # bucket_num
|
||||
writeVarUInt(1, ba) # 1
|
||||
ba.append(0) # is_overflows
|
||||
writeVarUInt(2, ba) # 2
|
||||
writeVarUInt(0, ba) # 0
|
||||
ba.extend([0] * 4) # bucket_num
|
||||
|
||||
|
||||
def sendEmptyBlock(s):
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(0, ba) # rows
|
||||
writeVarUInt(0, ba) # columns
|
||||
writeVarUInt(0, ba) # rows
|
||||
writeVarUInt(0, ba) # columns
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def assertPacket(packet, expected):
|
||||
assert(packet == expected), packet
|
||||
assert packet == expected, packet
|
||||
|
||||
|
||||
def readHeader(s):
|
||||
packet_type = readVarUInt(s)
|
||||
if packet_type == 2: # Exception
|
||||
if packet_type == 2: # Exception
|
||||
raise RuntimeError(readException(s))
|
||||
assertPacket(packet_type, 1) # Data
|
||||
assertPacket(packet_type, 1) # Data
|
||||
|
||||
readStringBinary(s) # external table name
|
||||
readStringBinary(s) # external table name
|
||||
# BlockInfo
|
||||
assertPacket(readVarUInt(s), 1) # 1
|
||||
assertPacket(readUInt8(s), 0) # is_overflows
|
||||
assertPacket(readVarUInt(s), 2) # 2
|
||||
assertPacket(readUInt32(s), 4294967295) # bucket_num
|
||||
assertPacket(readVarUInt(s), 0) # 0
|
||||
columns = readVarUInt(s) # rows
|
||||
rows = readVarUInt(s) # columns
|
||||
assertPacket(readVarUInt(s), 1) # 1
|
||||
assertPacket(readUInt8(s), 0) # is_overflows
|
||||
assertPacket(readVarUInt(s), 2) # 2
|
||||
assertPacket(readUInt32(s), 4294967295) # bucket_num
|
||||
assertPacket(readVarUInt(s), 0) # 0
|
||||
columns = readVarUInt(s) # rows
|
||||
rows = readVarUInt(s) # columns
|
||||
print("Rows {} Columns {}".format(rows, columns))
|
||||
for _ in range(columns):
|
||||
col_name = readStringBinary(s)
|
||||
@ -194,9 +201,9 @@ def readException(s):
|
||||
code = readUInt32(s)
|
||||
name = readStringBinary(s)
|
||||
text = readStringBinary(s)
|
||||
readStringBinary(s) # trace
|
||||
assertPacket(readUInt8(s), 0) # has_nested
|
||||
return "code {}: {}".format(code, text.replace('DB::Exception:', ''))
|
||||
readStringBinary(s) # trace
|
||||
assertPacket(readUInt8(s), 0) # has_nested
|
||||
return "code {}: {}".format(code, text.replace("DB::Exception:", ""))
|
||||
|
||||
|
||||
def insertValidLowCardinalityRow():
|
||||
@ -205,7 +212,12 @@ def insertValidLowCardinalityRow():
|
||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||
sendHello(s)
|
||||
receiveHello(s)
|
||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
||||
sendQuery(
|
||||
s,
|
||||
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||
CLICKHOUSE_DATABASE
|
||||
),
|
||||
)
|
||||
|
||||
# external tables
|
||||
sendEmptyBlock(s)
|
||||
@ -213,25 +225,27 @@ def insertValidLowCardinalityRow():
|
||||
|
||||
# Data
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary('x', ba)
|
||||
writeStringBinary('LowCardinality(String)', ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary('hello', ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary("x", ba)
|
||||
writeStringBinary("LowCardinality(String)", ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend(
|
||||
[3, 2] + [0] * 6
|
||||
) # indexes type: UInt64 [3], with additional keys [2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary("hello", ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||
s.sendall(ba)
|
||||
|
||||
# Fin block
|
||||
sendEmptyBlock(s)
|
||||
|
||||
assertPacket(readVarUInt(s), 5) # End of stream
|
||||
assertPacket(readVarUInt(s), 5) # End of stream
|
||||
s.close()
|
||||
|
||||
|
||||
@ -241,7 +255,12 @@ def insertLowCardinalityRowWithIndexOverflow():
|
||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||
sendHello(s)
|
||||
receiveHello(s)
|
||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
||||
sendQuery(
|
||||
s,
|
||||
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||
CLICKHOUSE_DATABASE
|
||||
),
|
||||
)
|
||||
|
||||
# external tables
|
||||
sendEmptyBlock(s)
|
||||
@ -249,19 +268,21 @@ def insertLowCardinalityRowWithIndexOverflow():
|
||||
|
||||
# Data
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary('x', ba)
|
||||
writeStringBinary('LowCardinality(String)', ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary('hello', ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 7 + [1]) # UInt64 index (overflow)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary("x", ba)
|
||||
writeStringBinary("LowCardinality(String)", ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend(
|
||||
[3, 2] + [0] * 6
|
||||
) # indexes type: UInt64 [3], with additional keys [2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary("hello", ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 7 + [1]) # UInt64 index (overflow)
|
||||
s.sendall(ba)
|
||||
|
||||
assertPacket(readVarUInt(s), 2)
|
||||
@ -275,7 +296,12 @@ def insertLowCardinalityRowWithIncorrectDictType():
|
||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||
sendHello(s)
|
||||
receiveHello(s)
|
||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
||||
sendQuery(
|
||||
s,
|
||||
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||
CLICKHOUSE_DATABASE
|
||||
),
|
||||
)
|
||||
|
||||
# external tables
|
||||
sendEmptyBlock(s)
|
||||
@ -283,32 +309,40 @@ def insertLowCardinalityRowWithIncorrectDictType():
|
||||
|
||||
# Data
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary('x', ba)
|
||||
writeStringBinary('LowCardinality(String)', ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary('hello', ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (overflow)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary("x", ba)
|
||||
writeStringBinary("LowCardinality(String)", ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend(
|
||||
[3, 3] + [0] * 6
|
||||
) # indexes type: UInt64 [3], with global dict and add keys [1 + 2]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary("hello", ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (overflow)
|
||||
s.sendall(ba)
|
||||
|
||||
assertPacket(readVarUInt(s), 2)
|
||||
print(readException(s))
|
||||
s.close()
|
||||
|
||||
|
||||
def insertLowCardinalityRowWithIncorrectAdditionalKeys():
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.settimeout(30)
|
||||
s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
|
||||
sendHello(s)
|
||||
receiveHello(s)
|
||||
sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE))
|
||||
sendQuery(
|
||||
s,
|
||||
"insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format(
|
||||
CLICKHOUSE_DATABASE
|
||||
),
|
||||
)
|
||||
|
||||
# external tables
|
||||
sendEmptyBlock(s)
|
||||
@ -316,30 +350,34 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys():
|
||||
|
||||
# Data
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary('x', ba)
|
||||
writeStringBinary('LowCardinality(String)', ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary('hello', ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||
writeVarUInt(1, ba) # rows
|
||||
writeVarUInt(1, ba) # columns
|
||||
writeStringBinary("x", ba)
|
||||
writeStringBinary("LowCardinality(String)", ba)
|
||||
ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys
|
||||
ba.extend(
|
||||
[3, 0] + [0] * 6
|
||||
) # indexes type: UInt64 [3], with NO additional keys [0]
|
||||
ba.extend([1] + [0] * 7) # num_keys in dict
|
||||
writeStringBinary("hello", ba) # key
|
||||
ba.extend([1] + [0] * 7) # num_indexes
|
||||
ba.extend([0] * 8) # UInt64 index (0 for 'hello')
|
||||
s.sendall(ba)
|
||||
|
||||
assertPacket(readVarUInt(s), 2)
|
||||
print(readException(s))
|
||||
s.close()
|
||||
|
||||
|
||||
def main():
|
||||
insertValidLowCardinalityRow()
|
||||
insertLowCardinalityRowWithIndexOverflow()
|
||||
insertLowCardinalityRowWithIncorrectDictType()
|
||||
insertLowCardinalityRowWithIncorrectAdditionalKeys()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -12,6 +12,7 @@ import subprocess
|
||||
from io import StringIO
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
|
||||
|
||||
def is_ipv6(host):
|
||||
try:
|
||||
socket.inet_aton(host)
|
||||
@ -19,6 +20,7 @@ def is_ipv6(host):
|
||||
except:
|
||||
return True
|
||||
|
||||
|
||||
def get_local_port(host, ipv6):
|
||||
if ipv6:
|
||||
family = socket.AF_INET6
|
||||
@ -29,8 +31,9 @@ def get_local_port(host, ipv6):
|
||||
fd.bind((host, 0))
|
||||
return fd.getsockname()[1]
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||
|
||||
#####################################################################################
|
||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||
@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
#####################################################################################
|
||||
|
||||
# IP-address of this host accessible from the outside world. Get the first one
|
||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
||||
HTTP_SERVER_HOST = (
|
||||
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||
)
|
||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||
|
||||
# IP address and port of the HTTP server started from this script.
|
||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||
if IS_IPV6:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://"
|
||||
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/"
|
||||
)
|
||||
else:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||
)
|
||||
|
||||
CSV_DATA = os.path.join(
|
||||
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||
)
|
||||
|
||||
CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
|
||||
|
||||
def get_ch_answer(query):
|
||||
host = CLICKHOUSE_HOST
|
||||
if IS_IPV6:
|
||||
host = f'[{host}]'
|
||||
host = f"[{host}]"
|
||||
|
||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
||||
url = os.environ.get(
|
||||
"CLICKHOUSE_URL",
|
||||
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||
)
|
||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||
|
||||
|
||||
def check_answers(query, answer):
|
||||
ch_answer = get_ch_answer(query)
|
||||
if ch_answer.strip() != answer.strip():
|
||||
@ -68,15 +86,16 @@ def check_answers(query, answer):
|
||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||
raise Exception("Fail on query")
|
||||
|
||||
|
||||
class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
def _set_headers(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/csv')
|
||||
self.send_header("Content-type", "text/csv")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
self._set_headers()
|
||||
self.wfile.write(('hello, world').encode())
|
||||
self.wfile.write(("hello, world").encode())
|
||||
# with open(CSV_DATA, 'r') as fl:
|
||||
# reader = csv.reader(fl, delimiter=',')
|
||||
# for row in reader:
|
||||
@ -84,33 +103,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
def read_chunk(self):
|
||||
msg = ''
|
||||
msg = ""
|
||||
while True:
|
||||
sym = self.rfile.read(1)
|
||||
if sym == '':
|
||||
if sym == "":
|
||||
break
|
||||
msg += sym.decode('utf-8')
|
||||
if msg.endswith('\r\n'):
|
||||
msg += sym.decode("utf-8")
|
||||
if msg.endswith("\r\n"):
|
||||
break
|
||||
length = int(msg[:-2], 16)
|
||||
if length == 0:
|
||||
return ''
|
||||
return ""
|
||||
content = self.rfile.read(length)
|
||||
self.rfile.read(2) # read sep \r\n
|
||||
return content.decode('utf-8')
|
||||
self.rfile.read(2) # read sep \r\n
|
||||
return content.decode("utf-8")
|
||||
|
||||
def do_POST(self):
|
||||
data = ''
|
||||
data = ""
|
||||
while True:
|
||||
chunk = self.read_chunk()
|
||||
if not chunk:
|
||||
break
|
||||
data += chunk
|
||||
with StringIO(data) as fl:
|
||||
reader = csv.reader(fl, delimiter=',')
|
||||
with open(CSV_DATA, 'a') as d:
|
||||
reader = csv.reader(fl, delimiter=",")
|
||||
with open(CSV_DATA, "a") as d:
|
||||
for row in reader:
|
||||
d.write(','.join(row) + '\n')
|
||||
d.write(",".join(row) + "\n")
|
||||
self._set_headers()
|
||||
self.wfile.write(b"ok")
|
||||
|
||||
@ -121,6 +140,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler):
|
||||
class HTTPServerV6(HTTPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
|
||||
def start_server():
|
||||
if IS_IPV6:
|
||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer)
|
||||
@ -130,57 +150,87 @@ def start_server():
|
||||
t = threading.Thread(target=httpd.serve_forever)
|
||||
return t, httpd
|
||||
|
||||
|
||||
# test section
|
||||
|
||||
def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""):
|
||||
with open(CSV_DATA, 'w') as f: # clear file
|
||||
f.write('')
|
||||
|
||||
def test_select(
|
||||
table_name="",
|
||||
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||
requests=[],
|
||||
answers=[],
|
||||
test_data="",
|
||||
):
|
||||
with open(CSV_DATA, "w") as f: # clear file
|
||||
f.write("")
|
||||
|
||||
if test_data:
|
||||
with open(CSV_DATA, 'w') as f:
|
||||
with open(CSV_DATA, "w") as f:
|
||||
f.write(test_data + "\n")
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
||||
get_ch_answer(
|
||||
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||
table_name, schema, HTTP_SERVER_URL_STR
|
||||
)
|
||||
)
|
||||
|
||||
for i in range(len(requests)):
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
check_answers(requests[i].format(tbl=tbl), answers[i])
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
|
||||
def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]):
|
||||
with open(CSV_DATA, 'w') as f: # flush test file
|
||||
f.write('')
|
||||
|
||||
def test_insert(
|
||||
table_name="",
|
||||
schema="str String,numuint UInt32,numint Int32,double Float64",
|
||||
requests_insert=[],
|
||||
requests_select=[],
|
||||
answers=[],
|
||||
):
|
||||
with open(CSV_DATA, "w") as f: # flush test file
|
||||
f.write("")
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR))
|
||||
get_ch_answer(
|
||||
"create table {} ({}) engine=URL('{}', 'CSV')".format(
|
||||
table_name, schema, HTTP_SERVER_URL_STR
|
||||
)
|
||||
)
|
||||
|
||||
for req in requests_insert:
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "table function url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
get_ch_answer(req.format(tbl=tbl))
|
||||
|
||||
|
||||
for i in range(len(requests_select)):
|
||||
tbl = table_name
|
||||
if not tbl:
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema)
|
||||
tbl = "url('{addr}', 'CSV', '{schema}')".format(
|
||||
addr=HTTP_SERVER_URL_STR, schema=schema
|
||||
)
|
||||
check_answers(requests_select[i].format(tbl=tbl), answers[i])
|
||||
|
||||
if table_name:
|
||||
get_ch_answer("drop table if exists {}".format(table_name))
|
||||
|
||||
|
||||
def test_select_url_engine(requests=[], answers=[], test_data=""):
|
||||
for i in range(len(requests)):
|
||||
check_answers(requests[i], answers[i])
|
||||
|
||||
|
||||
def main():
|
||||
test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8"
|
||||
"""
|
||||
@ -203,19 +253,29 @@ def main():
|
||||
"""
|
||||
|
||||
if IS_IPV6:
|
||||
query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')"
|
||||
query = (
|
||||
"select * from url('http://guest:guest@"
|
||||
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/', 'RawBLOB', 'a String')"
|
||||
)
|
||||
else:
|
||||
query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')"
|
||||
|
||||
|
||||
query = (
|
||||
"select * from url('http://guest:guest@"
|
||||
+ f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/', 'RawBLOB', 'a String')"
|
||||
)
|
||||
|
||||
select_requests_url_auth = {
|
||||
query : 'hello, world',
|
||||
query: "hello, world",
|
||||
}
|
||||
|
||||
t, httpd = start_server()
|
||||
t.start()
|
||||
test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data)
|
||||
test_select(
|
||||
requests=list(select_requests_url_auth.keys()),
|
||||
answers=list(select_requests_url_auth.values()),
|
||||
test_data=test_data,
|
||||
)
|
||||
httpd.shutdown()
|
||||
t.join()
|
||||
print("PASSED")
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
from math import sqrt, nan
|
||||
@ -8,7 +8,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
@ -25,7 +25,7 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha):
|
||||
return nan, nan, nan, nan
|
||||
z_stat = (p1 - p2) / se
|
||||
|
||||
one_side = 1 - stats.norm.cdf(abs(z_stat))
|
||||
one_side = 1 - stats.norm.cdf(abs(z_stat))
|
||||
p_value = one_side * 2
|
||||
|
||||
z = stats.norm.ppf(1 - 0.5 * alpha)
|
||||
@ -38,71 +38,171 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha):
|
||||
def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2):
|
||||
client = ClickHouseClient()
|
||||
real = client.query_return_df(
|
||||
"SELECT roundBankers({}.1, 16) as z_stat, ".format(name) +
|
||||
"roundBankers({}.2, 16) as p_value, ".format(name) +
|
||||
"roundBankers({}.3, 16) as ci_lower, ".format(name) +
|
||||
"roundBankers({}.4, 16) as ci_upper ".format(name) +
|
||||
"FORMAT TabSeparatedWithNames;")
|
||||
real_z_stat = real['z_stat'][0]
|
||||
real_p_value = real['p_value'][0]
|
||||
real_ci_lower = real['ci_lower'][0]
|
||||
real_ci_upper = real['ci_upper'][0]
|
||||
assert((np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(real_z_stat - np.float64(z_stat)) < precision), "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat)
|
||||
assert((np.isnan(real_p_value) and np.isnan(p_value)) or abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||
assert((np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(real_ci_lower - np.float64(ci_lower)) < precision), "clickhouse_ci_lower {}, py_ci_lower {}".format(real_ci_lower, ci_lower)
|
||||
assert((np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(real_ci_upper - np.float64(ci_upper)) < precision), "clickhouse_ci_upper {}, py_ci_upper {}".format(real_ci_upper, ci_upper)
|
||||
"SELECT roundBankers({}.1, 16) as z_stat, ".format(name)
|
||||
+ "roundBankers({}.2, 16) as p_value, ".format(name)
|
||||
+ "roundBankers({}.3, 16) as ci_lower, ".format(name)
|
||||
+ "roundBankers({}.4, 16) as ci_upper ".format(name)
|
||||
+ "FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
real_z_stat = real["z_stat"][0]
|
||||
real_p_value = real["p_value"][0]
|
||||
real_ci_lower = real["ci_lower"][0]
|
||||
real_ci_upper = real["ci_upper"][0]
|
||||
assert (np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(
|
||||
real_z_stat - np.float64(z_stat)
|
||||
) < precision, "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat)
|
||||
assert (np.isnan(real_p_value) and np.isnan(p_value)) or abs(
|
||||
real_p_value - np.float64(p_value)
|
||||
) < precision, "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||
assert (np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(
|
||||
real_ci_lower - np.float64(ci_lower)
|
||||
) < precision, "clickhouse_ci_lower {}, py_ci_lower {}".format(
|
||||
real_ci_lower, ci_lower
|
||||
)
|
||||
assert (np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(
|
||||
real_ci_upper - np.float64(ci_upper)
|
||||
) < precision, "clickhouse_ci_upper {}, py_ci_upper {}".format(
|
||||
real_ci_upper, ci_upper
|
||||
)
|
||||
|
||||
|
||||
def test_mean_ztest():
|
||||
counts = [0, 0]
|
||||
nobs = [0, 0]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
10, 10, 10, 10, 0.05
|
||||
)
|
||||
|
||||
counts = [10, 10]
|
||||
nobs = [10, 10]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
10, 10, 10, 10, 0.05
|
||||
)
|
||||
|
||||
counts = [16, 16]
|
||||
nobs = [16, 18]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [10, 20]
|
||||
nobs = [30, 40]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [20, 10]
|
||||
nobs = [40, 30]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [randrange(10,20), randrange(10,20)]
|
||||
nobs = [randrange(counts[0] + 1, counts[0] * 2), randrange(counts[1], counts[1] * 2)]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
counts = [randrange(10, 20), randrange(10, 20)]
|
||||
nobs = [
|
||||
randrange(counts[0] + 1, counts[0] * 2),
|
||||
randrange(counts[1], counts[1] * 2),
|
||||
]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [randrange(1,100), randrange(1,200)]
|
||||
counts = [randrange(1, 100), randrange(1, 200)]
|
||||
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [randrange(1,200), randrange(1,100)]
|
||||
counts = [randrange(1, 200), randrange(1, 100)]
|
||||
nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
counts = [randrange(1,1000), randrange(1,1000)]
|
||||
counts = [randrange(1, 1000), randrange(1, 1000)]
|
||||
nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)]
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05)
|
||||
test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper)
|
||||
z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(
|
||||
counts[0], counts[1], nobs[0], nobs[1], 0.05
|
||||
)
|
||||
test_and_check(
|
||||
"proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')"
|
||||
% (counts[0], counts[1], nobs[0], nobs[1]),
|
||||
z_stat,
|
||||
p_value,
|
||||
ci_lower,
|
||||
ci_upper,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_mean_ztest()
|
||||
print("Ok.")
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
from statistics import variance
|
||||
@ -7,7 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
@ -30,46 +30,95 @@ def twosample_mean_ztest(rvs1, rvs2, alpha=0.05):
|
||||
def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2):
|
||||
client = ClickHouseClient()
|
||||
client.query("DROP TABLE IF EXISTS ztest;")
|
||||
client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;");
|
||||
client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a])))
|
||||
client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b])))
|
||||
client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;")
|
||||
client.query(
|
||||
"INSERT INTO ztest VALUES {};".format(
|
||||
", ".join(["({},{})".format(i, 0) for i in a])
|
||||
)
|
||||
)
|
||||
client.query(
|
||||
"INSERT INTO ztest VALUES {};".format(
|
||||
", ".join(["({},{})".format(j, 1) for j in b])
|
||||
)
|
||||
)
|
||||
real = client.query_return_df(
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) +
|
||||
"roundBankers({}(left, right).2, 16) as p_value, ".format(name) +
|
||||
"roundBankers({}(left, right).3, 16) as ci_low, ".format(name) +
|
||||
"roundBankers({}(left, right).4, 16) as ci_high ".format(name) +
|
||||
"FROM ztest FORMAT TabSeparatedWithNames;")
|
||||
real_t_stat = real['t_stat'][0]
|
||||
real_p_value = real['p_value'][0]
|
||||
real_ci_low = real['ci_low'][0]
|
||||
real_ci_high = real['ci_high'][0]
|
||||
assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||
assert(abs(real_ci_low - np.float64(ci_low)) < precision), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low)
|
||||
assert(abs(real_ci_high - np.float64(ci_high)) < precision), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high)
|
||||
"SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name)
|
||||
+ "roundBankers({}(left, right).2, 16) as p_value, ".format(name)
|
||||
+ "roundBankers({}(left, right).3, 16) as ci_low, ".format(name)
|
||||
+ "roundBankers({}(left, right).4, 16) as ci_high ".format(name)
|
||||
+ "FROM ztest FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
real_t_stat = real["t_stat"][0]
|
||||
real_p_value = real["p_value"][0]
|
||||
real_ci_low = real["ci_low"][0]
|
||||
real_ci_high = real["ci_high"][0]
|
||||
assert (
|
||||
abs(real_t_stat - np.float64(t_stat)) < precision
|
||||
), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat)
|
||||
assert (
|
||||
abs(real_p_value - np.float64(p_value)) < precision
|
||||
), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value)
|
||||
assert (
|
||||
abs(real_ci_low - np.float64(ci_low)) < precision
|
||||
), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low)
|
||||
assert (
|
||||
abs(real_ci_high - np.float64(ci_high)) < precision
|
||||
), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high)
|
||||
client.query("DROP TABLE IF EXISTS ztest;")
|
||||
|
||||
|
||||
def test_mean_ztest():
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2)
|
||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
||||
test_and_check(
|
||||
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||
rvs1,
|
||||
rvs2,
|
||||
s,
|
||||
p,
|
||||
cl,
|
||||
ch,
|
||||
)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2)
|
||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
||||
test_and_check(
|
||||
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||
rvs1,
|
||||
rvs2,
|
||||
s,
|
||||
p,
|
||||
cl,
|
||||
ch,
|
||||
)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2)
|
||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
||||
test_and_check(
|
||||
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||
rvs1,
|
||||
rvs2,
|
||||
s,
|
||||
p,
|
||||
cl,
|
||||
ch,
|
||||
)
|
||||
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2)
|
||||
rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2)
|
||||
rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2)
|
||||
s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2)
|
||||
test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch)
|
||||
test_and_check(
|
||||
"meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)),
|
||||
rvs1,
|
||||
rvs2,
|
||||
s,
|
||||
p,
|
||||
cl,
|
||||
ch,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -3,47 +3,71 @@ import os
|
||||
import sys
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL')
|
||||
CLICKHOUSE_TMP = os.environ.get('CLICKHOUSE_TMP')
|
||||
CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL")
|
||||
CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP")
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
client = ClickHouseClient()
|
||||
|
||||
|
||||
def run_test(data_format, gen_data_template, settings):
|
||||
print(data_format)
|
||||
client.query("TRUNCATE TABLE t_async_insert")
|
||||
|
||||
expected = client.query(gen_data_template.format("TSV")).strip()
|
||||
data = client.query(gen_data_template.format(data_format), settings=settings,binary_result=True)
|
||||
data = client.query(
|
||||
gen_data_template.format(data_format), settings=settings, binary_result=True
|
||||
)
|
||||
|
||||
insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format)
|
||||
client.query_with_data(insert_query, data, settings=settings)
|
||||
|
||||
result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip()
|
||||
if result != expected:
|
||||
print("Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(data_format, expected, result))
|
||||
print(
|
||||
"Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(
|
||||
data_format, expected, result
|
||||
)
|
||||
)
|
||||
exit(1)
|
||||
|
||||
formats = client.query("SELECT name FROM system.formats WHERE is_input AND is_output \
|
||||
AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name").strip().split('\n')
|
||||
|
||||
formats = (
|
||||
client.query(
|
||||
"SELECT name FROM system.formats WHERE is_input AND is_output \
|
||||
AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name"
|
||||
)
|
||||
.strip()
|
||||
.split("\n")
|
||||
)
|
||||
|
||||
# Generic formats
|
||||
client.query("DROP TABLE IF EXISTS t_async_insert")
|
||||
client.query("CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory")
|
||||
client.query(
|
||||
"CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory"
|
||||
)
|
||||
gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}"
|
||||
|
||||
for data_format in formats:
|
||||
run_test(data_format, gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1})
|
||||
run_test(
|
||||
data_format,
|
||||
gen_data_query,
|
||||
settings={"async_insert": 1, "wait_for_async_insert": 1},
|
||||
)
|
||||
|
||||
# LineAsString
|
||||
client.query("DROP TABLE IF EXISTS t_async_insert")
|
||||
client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory")
|
||||
gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}"
|
||||
|
||||
run_test('LineAsString', gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1})
|
||||
run_test(
|
||||
"LineAsString",
|
||||
gen_data_query,
|
||||
settings={"async_insert": 1, "wait_for_async_insert": 1},
|
||||
)
|
||||
|
||||
# TODO: add CapnProto and Protobuf
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from http.server import SimpleHTTPRequestHandler,HTTPServer
|
||||
from http.server import SimpleHTTPRequestHandler, HTTPServer
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
@ -17,6 +17,7 @@ def is_ipv6(host):
|
||||
except:
|
||||
return True
|
||||
|
||||
|
||||
def get_local_port(host, ipv6):
|
||||
if ipv6:
|
||||
family = socket.AF_INET6
|
||||
@ -27,20 +28,19 @@ def get_local_port(host, ipv6):
|
||||
fd.bind((host, 0))
|
||||
return fd.getsockname()[1]
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost')
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123')
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost")
|
||||
CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123")
|
||||
|
||||
# Server returns this JSON response.
|
||||
SERVER_JSON_RESPONSE = \
|
||||
'''{
|
||||
SERVER_JSON_RESPONSE = """{
|
||||
"login": "ClickHouse",
|
||||
"id": 54801242,
|
||||
"name": "ClickHouse",
|
||||
"company": null
|
||||
}'''
|
||||
}"""
|
||||
|
||||
EXPECTED_ANSWER = \
|
||||
'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}'''
|
||||
EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}"""
|
||||
|
||||
#####################################################################################
|
||||
# This test starts an HTTP server and serves data to clickhouse url-engine based table.
|
||||
@ -51,26 +51,38 @@ EXPECTED_ANSWER = \
|
||||
#####################################################################################
|
||||
|
||||
# IP-address of this host accessible from the outside world. Get the first one
|
||||
HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0]
|
||||
HTTP_SERVER_HOST = (
|
||||
subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0]
|
||||
)
|
||||
IS_IPV6 = is_ipv6(HTTP_SERVER_HOST)
|
||||
HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6)
|
||||
|
||||
# IP address and port of the HTTP server started from this script.
|
||||
HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT)
|
||||
if IS_IPV6:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://"
|
||||
+ f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}"
|
||||
+ "/"
|
||||
)
|
||||
else:
|
||||
HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/"
|
||||
HTTP_SERVER_URL_STR = (
|
||||
"http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/"
|
||||
)
|
||||
|
||||
|
||||
def get_ch_answer(query):
|
||||
host = CLICKHOUSE_HOST
|
||||
if IS_IPV6:
|
||||
host = f'[{host}]'
|
||||
host = f"[{host}]"
|
||||
|
||||
url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP))
|
||||
url = os.environ.get(
|
||||
"CLICKHOUSE_URL",
|
||||
"http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP),
|
||||
)
|
||||
return urllib.request.urlopen(url, data=query.encode()).read().decode()
|
||||
|
||||
|
||||
def check_answers(query, answer):
|
||||
ch_answer = get_ch_answer(query)
|
||||
if ch_answer.strip() != answer.strip():
|
||||
@ -79,16 +91,17 @@ def check_answers(query, answer):
|
||||
print("Fetched answer :", ch_answer, file=sys.stderr)
|
||||
raise Exception("Fail on query")
|
||||
|
||||
|
||||
# Server with check for User-Agent headers.
|
||||
class HttpProcessor(SimpleHTTPRequestHandler):
|
||||
def _set_headers(self):
|
||||
user_agent = self.headers.get('User-Agent')
|
||||
if user_agent and user_agent.startswith('ClickHouse/'):
|
||||
user_agent = self.headers.get("User-Agent")
|
||||
if user_agent and user_agent.startswith("ClickHouse/"):
|
||||
self.send_response(200)
|
||||
else:
|
||||
self.send_response(403)
|
||||
|
||||
self.send_header('Content-Type', 'text/csv')
|
||||
self.send_header("Content-Type", "text/csv")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
@ -98,9 +111,11 @@ class HttpProcessor(SimpleHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
return
|
||||
|
||||
|
||||
class HTTPServerV6(HTTPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
|
||||
def start_server(requests_amount):
|
||||
if IS_IPV6:
|
||||
httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor)
|
||||
@ -114,15 +129,18 @@ def start_server(requests_amount):
|
||||
t = threading.Thread(target=real_func)
|
||||
return t
|
||||
|
||||
|
||||
#####################################################################
|
||||
# Testing area.
|
||||
#####################################################################
|
||||
|
||||
|
||||
def test_select():
|
||||
global HTTP_SERVER_URL_STR
|
||||
query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR)
|
||||
query = "SELECT * FROM url('{}','JSONAsString');".format(HTTP_SERVER_URL_STR)
|
||||
check_answers(query, EXPECTED_ANSWER)
|
||||
|
||||
|
||||
def main():
|
||||
# HEAD + GET
|
||||
t = start_server(3)
|
||||
@ -131,6 +149,7 @@ def main():
|
||||
t.join()
|
||||
print("PASSED")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
@ -141,4 +160,3 @@ if __name__ == "__main__":
|
||||
sys.stderr.flush()
|
||||
|
||||
os._exit(1)
|
||||
|
||||
|
@ -122,7 +122,7 @@ class HttpProcessor(BaseHTTPRequestHandler):
|
||||
get_call_num = 0
|
||||
responses_to_get = []
|
||||
|
||||
def send_head(self, from_get = False):
|
||||
def send_head(self, from_get=False):
|
||||
if self.headers["Range"] and HttpProcessor.allow_range:
|
||||
try:
|
||||
self.range = parse_byte_range(self.headers["Range"])
|
||||
@ -146,7 +146,9 @@ class HttpProcessor(BaseHTTPRequestHandler):
|
||||
self.send_error(416, "Requested Range Not Satisfiable")
|
||||
return None
|
||||
|
||||
retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
|
||||
retry_range_request = (
|
||||
first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
|
||||
)
|
||||
if retry_range_request:
|
||||
code = HttpProcessor.responses_to_get.pop()
|
||||
if code not in HttpProcessor.responses:
|
||||
@ -244,7 +246,9 @@ def run_test(allow_range, settings, check_retries=False):
|
||||
raise Exception("HTTP Range was not used when supported")
|
||||
|
||||
if check_retries and len(HttpProcessor.responses_to_get) > 0:
|
||||
raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried")
|
||||
raise Exception(
|
||||
"Expected to get http response 500, which had to be retried, but 200 ok returned and then retried"
|
||||
)
|
||||
|
||||
if retries_num > 0:
|
||||
expected_get_call_num += retries_num - 1
|
||||
@ -263,7 +267,7 @@ def run_test(allow_range, settings, check_retries=False):
|
||||
|
||||
|
||||
def main():
|
||||
settings = {"max_download_buffer_size" : 20}
|
||||
settings = {"max_download_buffer_size": 20}
|
||||
|
||||
# Test Accept-Ranges=False
|
||||
run_test(allow_range=False, settings=settings)
|
||||
@ -271,7 +275,7 @@ def main():
|
||||
run_test(allow_range=True, settings=settings)
|
||||
|
||||
# Test Accept-Ranges=True, parallel download is used
|
||||
settings = {"max_download_buffer_size" : 10}
|
||||
settings = {"max_download_buffer_size": 10}
|
||||
run_test(allow_range=True, settings=settings)
|
||||
|
||||
# Test Accept-Ranges=True, parallel download is not used,
|
||||
|
@ -7,7 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
@ -22,15 +22,22 @@ def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2):
|
||||
client.query("DROP TABLE IF EXISTS anova;")
|
||||
client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;")
|
||||
for group in range(n_groups):
|
||||
client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''')
|
||||
client.query(
|
||||
f"""INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};"""
|
||||
)
|
||||
|
||||
real = client.query_return_df(
|
||||
'''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''')
|
||||
"""SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;"""
|
||||
)
|
||||
|
||||
real_f_stat = real['f_stat'][0]
|
||||
real_p_value = real['p_value'][0]
|
||||
assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}"
|
||||
assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}"
|
||||
real_f_stat = real["f_stat"][0]
|
||||
real_p_value = real["p_value"][0]
|
||||
assert (
|
||||
abs(real_f_stat - np.float64(f_stat)) < precision
|
||||
), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}"
|
||||
assert (
|
||||
abs(real_p_value - np.float64(p_value)) < precision
|
||||
), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}"
|
||||
client.query("DROP TABLE IF EXISTS anova;")
|
||||
|
||||
|
||||
|
@ -123,10 +123,14 @@ Uses FinishSortingTransform: {}
|
||||
|
||||
for query in queries:
|
||||
check_query(query["where"], query["order_by"], query["optimize"], False)
|
||||
check_query(query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"])
|
||||
check_query(
|
||||
query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"]
|
||||
)
|
||||
|
||||
where_columns = [f"bitNot({col})" for col in query["where"]]
|
||||
check_query(where_columns, query["order_by"], query["optimize"], False)
|
||||
check_query(where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"])
|
||||
check_query(
|
||||
where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"]
|
||||
)
|
||||
|
||||
print("OK")
|
||||
|
@ -8,8 +8,8 @@ TRANSFER_ENCODING_HEADER = "Transfer-Encoding"
|
||||
|
||||
|
||||
def main():
|
||||
host = os.environ['CLICKHOUSE_HOST']
|
||||
port = int(os.environ['CLICKHOUSE_PORT_HTTP'])
|
||||
host = os.environ["CLICKHOUSE_HOST"]
|
||||
port = int(os.environ["CLICKHOUSE_PORT_HTTP"])
|
||||
|
||||
sock = socket(AF_INET, SOCK_STREAM)
|
||||
sock.connect((host, port))
|
||||
@ -47,4 +47,3 @@ def main():
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
@ -5,9 +5,10 @@ import os
|
||||
import uuid
|
||||
import json
|
||||
|
||||
CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
|
||||
CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
|
||||
CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
|
||||
CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1")
|
||||
CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000"))
|
||||
CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default")
|
||||
|
||||
|
||||
def writeVarUInt(x, ba):
|
||||
for _ in range(0, 9):
|
||||
@ -24,12 +25,12 @@ def writeVarUInt(x, ba):
|
||||
|
||||
|
||||
def writeStringBinary(s, ba):
|
||||
b = bytes(s, 'utf-8')
|
||||
b = bytes(s, "utf-8")
|
||||
writeVarUInt(len(s), ba)
|
||||
ba.extend(b)
|
||||
|
||||
|
||||
def readStrict(s, size = 1):
|
||||
def readStrict(s, size=1):
|
||||
res = bytearray()
|
||||
while size:
|
||||
cur = s.recv(size)
|
||||
@ -48,18 +49,23 @@ def readUInt(s, size=1):
|
||||
val += res[i] << (i * 8)
|
||||
return val
|
||||
|
||||
|
||||
def readUInt8(s):
|
||||
return readUInt(s)
|
||||
|
||||
|
||||
def readUInt16(s):
|
||||
return readUInt(s, 2)
|
||||
|
||||
|
||||
def readUInt32(s):
|
||||
return readUInt(s, 4)
|
||||
|
||||
|
||||
def readUInt64(s):
|
||||
return readUInt(s, 8)
|
||||
|
||||
|
||||
def readVarUInt(s):
|
||||
x = 0
|
||||
for i in range(9):
|
||||
@ -75,25 +81,25 @@ def readVarUInt(s):
|
||||
def readStringBinary(s):
|
||||
size = readVarUInt(s)
|
||||
s = readStrict(s, size)
|
||||
return s.decode('utf-8')
|
||||
return s.decode("utf-8")
|
||||
|
||||
|
||||
def sendHello(s):
|
||||
ba = bytearray()
|
||||
writeVarUInt(0, ba) # Hello
|
||||
writeStringBinary('simple native protocol', ba)
|
||||
writeVarUInt(0, ba) # Hello
|
||||
writeStringBinary("simple native protocol", ba)
|
||||
writeVarUInt(21, ba)
|
||||
writeVarUInt(9, ba)
|
||||
writeVarUInt(54449, ba)
|
||||
writeStringBinary(CLICKHOUSE_DATABASE, ba) # database
|
||||
writeStringBinary('default', ba) # user
|
||||
writeStringBinary('', ba) # pwd
|
||||
writeStringBinary(CLICKHOUSE_DATABASE, ba) # database
|
||||
writeStringBinary("default", ba) # user
|
||||
writeStringBinary("", ba) # pwd
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def receiveHello(s):
|
||||
p_type = readVarUInt(s)
|
||||
assert (p_type == 0) # Hello
|
||||
assert p_type == 0 # Hello
|
||||
server_name = readStringBinary(s)
|
||||
# print("Server name: ", server_name)
|
||||
server_version_major = readVarUInt(s)
|
||||
@ -111,65 +117,65 @@ def receiveHello(s):
|
||||
|
||||
|
||||
def serializeClientInfo(ba, query_id):
|
||||
writeStringBinary('default', ba) # initial_user
|
||||
writeStringBinary(query_id, ba) # initial_query_id
|
||||
writeStringBinary('127.0.0.1:9000', ba) # initial_address
|
||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||
ba.append(1) # TCP
|
||||
writeStringBinary('os_user', ba) # os_user
|
||||
writeStringBinary('client_hostname', ba) # client_hostname
|
||||
writeStringBinary('client_name', ba) # client_name
|
||||
writeStringBinary("default", ba) # initial_user
|
||||
writeStringBinary(query_id, ba) # initial_query_id
|
||||
writeStringBinary("127.0.0.1:9000", ba) # initial_address
|
||||
ba.extend([0] * 8) # initial_query_start_time_microseconds
|
||||
ba.append(1) # TCP
|
||||
writeStringBinary("os_user", ba) # os_user
|
||||
writeStringBinary("client_hostname", ba) # client_hostname
|
||||
writeStringBinary("client_name", ba) # client_name
|
||||
writeVarUInt(21, ba)
|
||||
writeVarUInt(9, ba)
|
||||
writeVarUInt(54449, ba)
|
||||
writeStringBinary('', ba) # quota_key
|
||||
writeVarUInt(0, ba) # distributed_depth
|
||||
writeVarUInt(1, ba) # client_version_patch
|
||||
ba.append(0) # No telemetry
|
||||
writeStringBinary("", ba) # quota_key
|
||||
writeVarUInt(0, ba) # distributed_depth
|
||||
writeVarUInt(1, ba) # client_version_patch
|
||||
ba.append(0) # No telemetry
|
||||
|
||||
|
||||
def sendQuery(s, query):
|
||||
ba = bytearray()
|
||||
query_id = uuid.uuid4().hex
|
||||
writeVarUInt(1, ba) # query
|
||||
writeVarUInt(1, ba) # query
|
||||
writeStringBinary(query_id, ba)
|
||||
|
||||
ba.append(1) # INITIAL_QUERY
|
||||
ba.append(1) # INITIAL_QUERY
|
||||
|
||||
# client info
|
||||
serializeClientInfo(ba, query_id)
|
||||
|
||||
writeStringBinary('', ba) # No settings
|
||||
writeStringBinary('', ba) # No interserver secret
|
||||
writeVarUInt(2, ba) # Stage - Complete
|
||||
ba.append(0) # No compression
|
||||
writeStringBinary(query, ba) # query, finally
|
||||
writeStringBinary("", ba) # No settings
|
||||
writeStringBinary("", ba) # No interserver secret
|
||||
writeVarUInt(2, ba) # Stage - Complete
|
||||
ba.append(0) # No compression
|
||||
writeStringBinary(query, ba) # query, finally
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def serializeBlockInfo(ba):
|
||||
writeVarUInt(1, ba) # 1
|
||||
ba.append(0) # is_overflows
|
||||
writeVarUInt(2, ba) # 2
|
||||
writeVarUInt(0, ba) # 0
|
||||
ba.extend([0] * 4) # bucket_num
|
||||
writeVarUInt(1, ba) # 1
|
||||
ba.append(0) # is_overflows
|
||||
writeVarUInt(2, ba) # 2
|
||||
writeVarUInt(0, ba) # 0
|
||||
ba.extend([0] * 4) # bucket_num
|
||||
|
||||
|
||||
def sendEmptyBlock(s):
|
||||
ba = bytearray()
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary('', ba)
|
||||
writeVarUInt(2, ba) # Data
|
||||
writeStringBinary("", ba)
|
||||
serializeBlockInfo(ba)
|
||||
writeVarUInt(0, ba) # rows
|
||||
writeVarUInt(0, ba) # columns
|
||||
writeVarUInt(0, ba) # rows
|
||||
writeVarUInt(0, ba) # columns
|
||||
s.sendall(ba)
|
||||
|
||||
|
||||
def assertPacket(packet, expected):
|
||||
assert(packet == expected), packet
|
||||
assert packet == expected, packet
|
||||
|
||||
|
||||
class Progress():
|
||||
class Progress:
|
||||
def __init__(self):
|
||||
# NOTE: this is done in ctor to initialize __dict__
|
||||
self.read_rows = 0
|
||||
@ -198,11 +204,12 @@ class Progress():
|
||||
|
||||
def __bool__(self):
|
||||
return (
|
||||
self.read_rows > 0 or
|
||||
self.read_bytes > 0 or
|
||||
self.total_rows_to_read > 0 or
|
||||
self.written_rows > 0 or
|
||||
self.written_bytes > 0)
|
||||
self.read_rows > 0
|
||||
or self.read_bytes > 0
|
||||
or self.total_rows_to_read > 0
|
||||
or self.written_rows > 0
|
||||
or self.written_bytes > 0
|
||||
)
|
||||
|
||||
|
||||
def readProgress(s):
|
||||
@ -219,13 +226,14 @@ def readProgress(s):
|
||||
progress.readPacket(s)
|
||||
return progress
|
||||
|
||||
|
||||
def readException(s):
|
||||
code = readUInt32(s)
|
||||
name = readStringBinary(s)
|
||||
text = readStringBinary(s)
|
||||
readStringBinary(s) # trace
|
||||
assertPacket(readUInt8(s), 0) # has_nested
|
||||
return "code {}: {}".format(code, text.replace('DB::Exception:', ''))
|
||||
readStringBinary(s) # trace
|
||||
assertPacket(readUInt8(s), 0) # has_nested
|
||||
return "code {}: {}".format(code, text.replace("DB::Exception:", ""))
|
||||
|
||||
|
||||
def main():
|
||||
@ -236,7 +244,10 @@ def main():
|
||||
receiveHello(s)
|
||||
# For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet.
|
||||
# NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback.
|
||||
sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000")
|
||||
sendQuery(
|
||||
s,
|
||||
"insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000",
|
||||
)
|
||||
|
||||
# external tables
|
||||
sendEmptyBlock(s)
|
||||
|
@ -4,18 +4,19 @@ import os
|
||||
import sys
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
|
||||
class Tester:
|
||||
'''
|
||||
"""
|
||||
- Creates test table
|
||||
- Deletes the specified range of rows
|
||||
- Masks another range using row-level policy
|
||||
- Runs some read queries and checks that the results
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, session, url, index_granularity, total_rows):
|
||||
self.session = session
|
||||
self.url = url
|
||||
@ -25,10 +26,10 @@ class Tester:
|
||||
self.repro_queries = []
|
||||
|
||||
def report_error(self):
|
||||
print('Repro steps:', '\n\n\t'.join(self.repro_queries))
|
||||
print("Repro steps:", "\n\n\t".join(self.repro_queries))
|
||||
exit(1)
|
||||
|
||||
def query(self, query_text, include_in_repro_steps = True, expected_data = None):
|
||||
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
|
||||
self.repro_queries.append(query_text)
|
||||
resp = self.session.post(self.url, data=query_text)
|
||||
if resp.status_code != 200:
|
||||
@ -36,113 +37,187 @@ class Tester:
|
||||
error = resp.text[0:40]
|
||||
if error not in self.reported_errors:
|
||||
self.reported_errors.add(error)
|
||||
print('Code:', resp.status_code)
|
||||
print('Result:', resp.text)
|
||||
print("Code:", resp.status_code)
|
||||
print("Result:", resp.text)
|
||||
self.report_error()
|
||||
|
||||
result = resp.text
|
||||
# Check that the result is as expected
|
||||
if ((not expected_data is None) and (int(result) != len(expected_data))):
|
||||
print('Expected {} rows, got {}'.format(len(expected_data), result))
|
||||
print('Expected data:' + str(expected_data))
|
||||
if (not expected_data is None) and (int(result) != len(expected_data)):
|
||||
print("Expected {} rows, got {}".format(len(expected_data), result))
|
||||
print("Expected data:" + str(expected_data))
|
||||
self.report_error()
|
||||
|
||||
if not include_in_repro_steps:
|
||||
self.repro_queries.pop()
|
||||
|
||||
|
||||
def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end):
|
||||
def check_data(
|
||||
self,
|
||||
all_data,
|
||||
delete_range_start,
|
||||
delete_range_end,
|
||||
row_level_policy_range_start,
|
||||
row_level_policy_range_end,
|
||||
):
|
||||
all_data_after_delete = all_data[
|
||||
~((all_data.a == 0) &
|
||||
(all_data.b > delete_range_start) &
|
||||
(all_data.b <= delete_range_end))]
|
||||
~(
|
||||
(all_data.a == 0)
|
||||
& (all_data.b > delete_range_start)
|
||||
& (all_data.b <= delete_range_end)
|
||||
)
|
||||
]
|
||||
all_data_after_row_policy = all_data_after_delete[
|
||||
(all_data_after_delete.b <= row_level_policy_range_start) |
|
||||
(all_data_after_delete.b > row_level_policy_range_end)]
|
||||
(all_data_after_delete.b <= row_level_policy_range_start)
|
||||
| (all_data_after_delete.b > row_level_policy_range_end)
|
||||
]
|
||||
|
||||
for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value
|
||||
self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy)
|
||||
for to_select in [
|
||||
"count()",
|
||||
"sum(d)",
|
||||
]: # Test reading with and without column with default value
|
||||
self.query(
|
||||
"SELECT {} FROM tab_02473;".format(to_select),
|
||||
False,
|
||||
all_data_after_row_policy,
|
||||
)
|
||||
|
||||
delta = 10
|
||||
for query_range_start in [0, delta]:
|
||||
for query_range_end in [self.total_rows - delta]: #, self.total_rows]:
|
||||
for query_range_end in [self.total_rows - delta]: # , self.total_rows]:
|
||||
expected = all_data_after_row_policy[
|
||||
(all_data_after_row_policy.a == 0) &
|
||||
(all_data_after_row_policy.b > query_range_start) &
|
||||
(all_data_after_row_policy.b <= query_range_end)]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format(
|
||||
to_select, query_range_start, query_range_end), False, expected)
|
||||
(all_data_after_row_policy.a == 0)
|
||||
& (all_data_after_row_policy.b > query_range_start)
|
||||
& (all_data_after_row_policy.b <= query_range_end)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
|
||||
to_select, query_range_start, query_range_end
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
expected = all_data_after_row_policy[
|
||||
(all_data_after_row_policy.a == 0) &
|
||||
(all_data_after_row_policy.c > query_range_start) &
|
||||
(all_data_after_row_policy.c <= query_range_end)]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format(
|
||||
to_select, query_range_start, query_range_end), False, expected)
|
||||
(all_data_after_row_policy.a == 0)
|
||||
& (all_data_after_row_policy.c > query_range_start)
|
||||
& (all_data_after_row_policy.c <= query_range_end)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format(
|
||||
to_select, query_range_start, query_range_end
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
expected = all_data_after_row_policy[
|
||||
(all_data_after_row_policy.a == 0) &
|
||||
((all_data_after_row_policy.c <= query_range_start) |
|
||||
(all_data_after_row_policy.c > query_range_end))]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format(
|
||||
to_select, query_range_start, query_range_end), False, expected)
|
||||
(all_data_after_row_policy.a == 0)
|
||||
& (
|
||||
(all_data_after_row_policy.c <= query_range_start)
|
||||
| (all_data_after_row_policy.c > query_range_end)
|
||||
)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format(
|
||||
to_select, query_range_start, query_range_end
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
|
||||
def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end):
|
||||
def run_test(
|
||||
self,
|
||||
delete_range_start,
|
||||
delete_range_end,
|
||||
row_level_policy_range_start,
|
||||
row_level_policy_range_end,
|
||||
):
|
||||
self.repro_queries = []
|
||||
|
||||
self.query('''
|
||||
self.query(
|
||||
"""
|
||||
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a))
|
||||
ENGINE = MergeTree() ORDER BY (a, b)
|
||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity))
|
||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
|
||||
self.index_granularity
|
||||
)
|
||||
)
|
||||
|
||||
self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows))
|
||||
self.query(
|
||||
"INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format(
|
||||
self.total_rows
|
||||
)
|
||||
)
|
||||
|
||||
client = ClickHouseClient()
|
||||
all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;")
|
||||
all_data = client.query_return_df(
|
||||
"SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
|
||||
self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;')
|
||||
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
|
||||
|
||||
# After all data has been written add a column with default value
|
||||
self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;')
|
||||
self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;")
|
||||
|
||||
self.check_data(all_data, -100, -100, -100, -100)
|
||||
|
||||
self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format(
|
||||
delete_range_start, delete_range_end))
|
||||
self.query(
|
||||
"DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format(
|
||||
delete_range_start, delete_range_end
|
||||
)
|
||||
)
|
||||
|
||||
self.check_data(all_data, delete_range_start, delete_range_end, -100, -100)
|
||||
|
||||
self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format(
|
||||
row_level_policy_range_start, row_level_policy_range_end))
|
||||
self.query(
|
||||
"CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format(
|
||||
row_level_policy_range_start, row_level_policy_range_end
|
||||
)
|
||||
)
|
||||
|
||||
self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end)
|
||||
self.check_data(
|
||||
all_data,
|
||||
delete_range_start,
|
||||
delete_range_end,
|
||||
row_level_policy_range_start,
|
||||
row_level_policy_range_end,
|
||||
)
|
||||
|
||||
self.query('DROP POLICY policy_tab_02473 ON tab_02473;')
|
||||
|
||||
self.query('DROP TABLE tab_02473;')
|
||||
self.query("DROP POLICY policy_tab_02473 ON tab_02473;")
|
||||
|
||||
self.query("DROP TABLE tab_02473;")
|
||||
|
||||
|
||||
def main():
|
||||
# Set mutations to synchronous mode and enable lightweight DELETE's
|
||||
url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1'
|
||||
url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1"
|
||||
|
||||
default_index_granularity = 10;
|
||||
default_index_granularity = 10
|
||||
total_rows = 8 * default_index_granularity
|
||||
step = default_index_granularity
|
||||
session = requests.Session()
|
||||
for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]:
|
||||
for index_granularity in [
|
||||
default_index_granularity - 1,
|
||||
default_index_granularity,
|
||||
]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]:
|
||||
tester = Tester(session, url, index_granularity, total_rows)
|
||||
# Test combinations of ranges of various size masked by lightweight DELETES
|
||||
# along with ranges of various size masked by row-level policies
|
||||
for delete_range_start in range(0, total_rows, 3 * step):
|
||||
for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step):
|
||||
for delete_range_end in range(
|
||||
delete_range_start + 3 * step, total_rows, 2 * step
|
||||
):
|
||||
for row_level_policy_range_start in range(0, total_rows, 3 * step):
|
||||
for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step):
|
||||
tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end)
|
||||
for row_level_policy_range_end in range(
|
||||
row_level_policy_range_start + 3 * step, total_rows, 2 * step
|
||||
):
|
||||
tester.run_test(
|
||||
delete_range_start,
|
||||
delete_range_end,
|
||||
row_level_policy_range_start,
|
||||
row_level_policy_range_end,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
@ -4,16 +4,17 @@ import os
|
||||
import sys
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
|
||||
class Tester:
|
||||
'''
|
||||
"""
|
||||
- Creates test table with multiple integer columns
|
||||
- Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct
|
||||
'''
|
||||
"""
|
||||
|
||||
def __init__(self, session, url, index_granularity, total_rows):
|
||||
self.session = session
|
||||
self.url = url
|
||||
@ -23,10 +24,10 @@ class Tester:
|
||||
self.repro_queries = []
|
||||
|
||||
def report_error(self):
|
||||
print('Repro steps:', '\n\n\t'.join(self.repro_queries))
|
||||
print("Repro steps:", "\n\n\t".join(self.repro_queries))
|
||||
exit(1)
|
||||
|
||||
def query(self, query_text, include_in_repro_steps = True, expected_data = None):
|
||||
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
|
||||
self.repro_queries.append(query_text)
|
||||
resp = self.session.post(self.url, data=query_text)
|
||||
if resp.status_code != 200:
|
||||
@ -34,98 +35,150 @@ class Tester:
|
||||
error = resp.text[0:40]
|
||||
if error not in self.reported_errors:
|
||||
self.reported_errors.add(error)
|
||||
print('Code:', resp.status_code)
|
||||
print('Result:', resp.text)
|
||||
print("Code:", resp.status_code)
|
||||
print("Result:", resp.text)
|
||||
self.report_error()
|
||||
|
||||
result = resp.text
|
||||
# Check that the result is as expected
|
||||
if ((not expected_data is None) and (int(result) != len(expected_data))):
|
||||
print('Expected {} rows, got {}'.format(len(expected_data), result))
|
||||
print('Expected data:' + str(expected_data))
|
||||
if (not expected_data is None) and (int(result) != len(expected_data)):
|
||||
print("Expected {} rows, got {}".format(len(expected_data), result))
|
||||
print("Expected data:" + str(expected_data))
|
||||
self.report_error()
|
||||
|
||||
if not include_in_repro_steps:
|
||||
self.repro_queries.pop()
|
||||
|
||||
|
||||
def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end):
|
||||
for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value
|
||||
self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data)
|
||||
def check_data(
|
||||
self, all_data, c_range_start, c_range_end, d_range_start, d_range_end
|
||||
):
|
||||
for to_select in [
|
||||
"count()",
|
||||
"sum(e)",
|
||||
]: # Test reading with and without column with default value
|
||||
self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data)
|
||||
|
||||
delta = 10
|
||||
for b_range_start in [0, delta]:
|
||||
for b_range_end in [self.total_rows - delta]: #, self.total_rows]:
|
||||
for b_range_end in [self.total_rows - delta]: # , self.total_rows]:
|
||||
expected = all_data[
|
||||
(all_data.a == 0) &
|
||||
(all_data.b > b_range_start) &
|
||||
(all_data.b <= b_range_end)]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format(
|
||||
to_select, b_range_start, b_range_end), False, expected)
|
||||
(all_data.a == 0)
|
||||
& (all_data.b > b_range_start)
|
||||
& (all_data.b <= b_range_end)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
|
||||
to_select, b_range_start, b_range_end
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
expected = all_data[
|
||||
(all_data.a == 0) &
|
||||
(all_data.b > b_range_start) &
|
||||
(all_data.b <= b_range_end) &
|
||||
(all_data.c > c_range_start) &
|
||||
(all_data.c <= c_range_end)]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format(
|
||||
to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected)
|
||||
(all_data.a == 0)
|
||||
& (all_data.b > b_range_start)
|
||||
& (all_data.b <= b_range_end)
|
||||
& (all_data.c > c_range_start)
|
||||
& (all_data.c <= c_range_end)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format(
|
||||
to_select,
|
||||
b_range_start,
|
||||
b_range_end,
|
||||
c_range_start,
|
||||
c_range_end,
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
expected = all_data[
|
||||
(all_data.a == 0) &
|
||||
(all_data.b > b_range_start) &
|
||||
(all_data.b <= b_range_end) &
|
||||
(all_data.c > c_range_start) &
|
||||
(all_data.c <= c_range_end) &
|
||||
(all_data.d > d_range_start) &
|
||||
(all_data.d <= d_range_end)]
|
||||
self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format(
|
||||
to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected)
|
||||
|
||||
(all_data.a == 0)
|
||||
& (all_data.b > b_range_start)
|
||||
& (all_data.b <= b_range_end)
|
||||
& (all_data.c > c_range_start)
|
||||
& (all_data.c <= c_range_end)
|
||||
& (all_data.d > d_range_start)
|
||||
& (all_data.d <= d_range_end)
|
||||
]
|
||||
self.query(
|
||||
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format(
|
||||
to_select,
|
||||
b_range_start,
|
||||
b_range_end,
|
||||
c_range_start,
|
||||
c_range_end,
|
||||
d_range_start,
|
||||
d_range_end,
|
||||
),
|
||||
False,
|
||||
expected,
|
||||
)
|
||||
|
||||
def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end):
|
||||
self.repro_queries = []
|
||||
|
||||
self.query('''
|
||||
self.query(
|
||||
"""
|
||||
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a))
|
||||
ENGINE = MergeTree() ORDER BY (a, b)
|
||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity))
|
||||
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
|
||||
self.index_granularity
|
||||
)
|
||||
)
|
||||
|
||||
self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows))
|
||||
self.query(
|
||||
"INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format(
|
||||
self.total_rows
|
||||
)
|
||||
)
|
||||
|
||||
client = ClickHouseClient()
|
||||
all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;")
|
||||
all_data = client.query_return_df(
|
||||
"SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;"
|
||||
)
|
||||
|
||||
self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;')
|
||||
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
|
||||
|
||||
# After all data has been written add a column with default value
|
||||
self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;')
|
||||
self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;")
|
||||
|
||||
self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end)
|
||||
|
||||
self.query('DROP TABLE tab_02473;')
|
||||
self.check_data(
|
||||
all_data, c_range_start, c_range_end, d_range_start, d_range_end
|
||||
)
|
||||
|
||||
self.query("DROP TABLE tab_02473;")
|
||||
|
||||
|
||||
def main():
|
||||
# Enable multiple prewhere read steps
|
||||
url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1'
|
||||
url = (
|
||||
os.environ["CLICKHOUSE_URL"]
|
||||
+ "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1"
|
||||
)
|
||||
|
||||
default_index_granularity = 10;
|
||||
default_index_granularity = 10
|
||||
total_rows = 8 * default_index_granularity
|
||||
step = default_index_granularity
|
||||
session = requests.Session()
|
||||
for index_granularity in [default_index_granularity-1, default_index_granularity]:
|
||||
for index_granularity in [default_index_granularity - 1, default_index_granularity]:
|
||||
tester = Tester(session, url, index_granularity, total_rows)
|
||||
# Test combinations of ranges of columns c and d
|
||||
for c_range_start in range(0, total_rows, int(2.3 * step)):
|
||||
for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)):
|
||||
for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)):
|
||||
for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)):
|
||||
tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end)
|
||||
for c_range_end in range(
|
||||
c_range_start + 3 * step, total_rows, int(2.1 * step)
|
||||
):
|
||||
for d_range_start in range(
|
||||
int(0.5 * step), total_rows, int(2.7 * step)
|
||||
):
|
||||
for d_range_end in range(
|
||||
d_range_start + 3 * step, total_rows, int(2.2 * step)
|
||||
):
|
||||
tester.run_test(
|
||||
c_range_start, c_range_end, d_range_start, d_range_end
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
@ -8,7 +8,7 @@ import time
|
||||
from threading import Thread
|
||||
|
||||
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
||||
|
||||
from pure_http_client import ClickHouseClient
|
||||
|
||||
@ -16,14 +16,23 @@ client = ClickHouseClient()
|
||||
|
||||
# test table without partition
|
||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
||||
client.query('''
|
||||
client.query(
|
||||
"""
|
||||
CREATE TABLE t_async_insert_dedup_no_part (
|
||||
KeyID UInt32
|
||||
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
||||
ORDER BY (KeyID)
|
||||
''')
|
||||
"""
|
||||
)
|
||||
|
||||
client.query("insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", settings = {"async_insert": 1, "wait_for_async_insert": 1, "insert_keeper_fault_injection_probability": 0})
|
||||
client.query(
|
||||
"insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)",
|
||||
settings={
|
||||
"async_insert": 1,
|
||||
"wait_for_async_insert": 1,
|
||||
"insert_keeper_fault_injection_probability": 0,
|
||||
},
|
||||
)
|
||||
result = client.query("select count(*) from t_async_insert_dedup_no_part")
|
||||
print(result, flush=True)
|
||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
||||
@ -32,13 +41,13 @@ client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
|
||||
def generate_data(q, total_number):
|
||||
old_data = []
|
||||
max_chunk_size = 30
|
||||
partitions = ['2022-11-11 10:10:10', '2022-12-12 10:10:10']
|
||||
partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"]
|
||||
last_number = 0
|
||||
while True:
|
||||
dup_simulate = random.randint(0,3)
|
||||
dup_simulate = random.randint(0, 3)
|
||||
# insert old data randomly. 25% of them are dup.
|
||||
if dup_simulate == 0:
|
||||
last_idx = len(old_data)-1
|
||||
last_idx = len(old_data) - 1
|
||||
if last_idx < 0:
|
||||
continue
|
||||
idx = last_idx - random.randint(0, 50)
|
||||
@ -53,7 +62,7 @@ def generate_data(q, total_number):
|
||||
end = start + chunk_size
|
||||
if end > total_number:
|
||||
end = total_number
|
||||
for i in range(start, end+1):
|
||||
for i in range(start, end + 1):
|
||||
partition = partitions[random.randint(0, 1)]
|
||||
insert_stmt += "('{}', {}),".format(partition, i)
|
||||
insert_stmt = insert_stmt[:-1]
|
||||
@ -65,33 +74,46 @@ def generate_data(q, total_number):
|
||||
# wait all the tasks is done.
|
||||
q.join()
|
||||
|
||||
|
||||
def fetch_and_insert_data(q, client):
|
||||
while True:
|
||||
insert = q.get()
|
||||
client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0})
|
||||
client.query(
|
||||
insert,
|
||||
settings={
|
||||
"async_insert": 1,
|
||||
"async_insert_deduplicate": 1,
|
||||
"wait_for_async_insert": 0,
|
||||
"async_insert_busy_timeout_ms": 1500,
|
||||
"insert_keeper_fault_injection_probability": 0,
|
||||
},
|
||||
)
|
||||
q.task_done()
|
||||
sleep_time = random.randint(50, 500)
|
||||
time.sleep(sleep_time/1000.0)
|
||||
time.sleep(sleep_time / 1000.0)
|
||||
|
||||
|
||||
# main process
|
||||
client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY")
|
||||
client.query('''
|
||||
client.query(
|
||||
"""
|
||||
CREATE TABLE t_async_insert_dedup (
|
||||
EventDate DateTime,
|
||||
KeyID UInt32
|
||||
) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}')
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1
|
||||
''')
|
||||
"""
|
||||
)
|
||||
|
||||
q = queue.Queue(100)
|
||||
total_number = 10000
|
||||
|
||||
gen = Thread(target = generate_data, args = [q, total_number])
|
||||
gen = Thread(target=generate_data, args=[q, total_number])
|
||||
gen.start()
|
||||
|
||||
for i in range(3):
|
||||
insert = Thread(target = fetch_and_insert_data, args = [q, client])
|
||||
insert = Thread(target=fetch_and_insert_data, args=[q, client])
|
||||
insert.start()
|
||||
|
||||
gen.join()
|
||||
@ -109,7 +131,7 @@ while True:
|
||||
errMsg = f"the size of result is {len(result)}. we expect {total_number}."
|
||||
else:
|
||||
for i in range(total_number):
|
||||
expect = str(i+1)
|
||||
expect = str(i + 1)
|
||||
real = result[i]
|
||||
if expect != real:
|
||||
err = True
|
||||
@ -117,7 +139,7 @@ while True:
|
||||
break
|
||||
# retry several times to get stable results.
|
||||
if err and retry >= 5:
|
||||
print (errMsg, flush=True)
|
||||
print(errMsg, flush=True)
|
||||
elif err:
|
||||
retry += 1
|
||||
continue
|
||||
@ -125,11 +147,15 @@ while True:
|
||||
print(len(result), flush=True)
|
||||
break
|
||||
|
||||
result = client.query("SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'")
|
||||
result = client.query(
|
||||
"SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'"
|
||||
)
|
||||
result = int(result.split()[0])
|
||||
if result <= 0:
|
||||
raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}")
|
||||
result = client.query("SELECT value FROM system.events where event = 'AsyncInsertCacheHits'")
|
||||
result = client.query(
|
||||
"SELECT value FROM system.events where event = 'AsyncInsertCacheHits'"
|
||||
)
|
||||
result = int(result.split()[0])
|
||||
if result <= 0:
|
||||
raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}")
|
||||
|
@ -19,9 +19,9 @@ import tenacity
|
||||
import xmltodict
|
||||
import yaml
|
||||
|
||||
SELECT_VERSION = r'SELECT version()'
|
||||
SELECT_VERSION = r"SELECT version()"
|
||||
|
||||
SELECT_UPTIME = r'''
|
||||
SELECT_UPTIME = r"""
|
||||
{% if version_ge('21.3') -%}
|
||||
SELECT formatReadableTimeDelta(uptime())
|
||||
{% else -%}
|
||||
@ -29,18 +29,18 @@ SELECT
|
||||
toString(floor(uptime() / 3600 / 24)) || ' days ' ||
|
||||
toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours'
|
||||
{% endif -%}
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'"
|
||||
|
||||
SELECT_DATABASE_ENGINES = r'''SELECT
|
||||
SELECT_DATABASE_ENGINES = r"""SELECT
|
||||
engine,
|
||||
count() "count"
|
||||
FROM system.databases
|
||||
GROUP BY engine
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_DATABASES = r'''SELECT
|
||||
SELECT_DATABASES = r"""SELECT
|
||||
name,
|
||||
engine,
|
||||
tables,
|
||||
@ -62,17 +62,17 @@ LEFT JOIN
|
||||
) AS db_stats ON db.name = db_stats.database
|
||||
ORDER BY bytes_on_disk DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_TABLE_ENGINES = r'''SELECT
|
||||
SELECT_TABLE_ENGINES = r"""SELECT
|
||||
engine,
|
||||
count() "count"
|
||||
FROM system.tables
|
||||
WHERE database != 'system'
|
||||
GROUP BY engine
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_DICTIONARIES = r'''SELECT
|
||||
SELECT_DICTIONARIES = r"""SELECT
|
||||
source,
|
||||
type,
|
||||
status,
|
||||
@ -80,13 +80,13 @@ SELECT_DICTIONARIES = r'''SELECT
|
||||
FROM system.dictionaries
|
||||
GROUP BY source, type, status
|
||||
ORDER BY status DESC, source
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_ACCESS = "SHOW ACCESS"
|
||||
|
||||
SELECT_QUOTA_USAGE = "SHOW QUOTA"
|
||||
|
||||
SELECT_REPLICAS = r'''SELECT
|
||||
SELECT_REPLICAS = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
is_leader,
|
||||
@ -98,9 +98,9 @@ SELECT_REPLICAS = r'''SELECT
|
||||
FROM system.replicas
|
||||
ORDER BY absolute_delay DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_REPLICATION_QUEUE = r'''SELECT
|
||||
SELECT_REPLICATION_QUEUE = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
replica_name,
|
||||
@ -121,9 +121,9 @@ SELECT_REPLICATION_QUEUE = r'''SELECT
|
||||
FROM system.replication_queue
|
||||
ORDER BY create_time ASC
|
||||
LIMIT 20
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_REPLICATED_FETCHES = r'''SELECT
|
||||
SELECT_REPLICATED_FETCHES = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
round(elapsed, 1) "elapsed",
|
||||
@ -140,9 +140,9 @@ SELECT_REPLICATED_FETCHES = r'''SELECT
|
||||
to_detached,
|
||||
thread_id
|
||||
FROM system.replicated_fetches
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_PARTS_PER_TABLE = r'''SELECT
|
||||
SELECT_PARTS_PER_TABLE = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
count() "partitions",
|
||||
@ -162,9 +162,9 @@ FROM
|
||||
GROUP BY database, table
|
||||
ORDER BY max_parts_per_partition DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_MERGES = r'''SELECT
|
||||
SELECT_MERGES = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
round(elapsed, 1) "elapsed",
|
||||
@ -187,9 +187,9 @@ SELECT_MERGES = r'''SELECT
|
||||
formatReadableSize(memory_usage) "memory_usage"
|
||||
{% endif -%}
|
||||
FROM system.merges
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_MUTATIONS = r'''SELECT
|
||||
SELECT_MUTATIONS = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
mutation_id,
|
||||
@ -206,9 +206,9 @@ SELECT_MUTATIONS = r'''SELECT
|
||||
FROM system.mutations
|
||||
WHERE NOT is_done
|
||||
ORDER BY create_time DESC
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_RECENT_DATA_PARTS = r'''SELECT
|
||||
SELECT_RECENT_DATA_PARTS = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
engine,
|
||||
@ -242,9 +242,9 @@ SELECT_RECENT_DATA_PARTS = r'''SELECT
|
||||
FROM system.parts
|
||||
WHERE modification_time > now() - INTERVAL 3 MINUTE
|
||||
ORDER BY modification_time DESC
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_DETACHED_DATA_PARTS = r'''SELECT
|
||||
SELECT_DETACHED_DATA_PARTS = r"""SELECT
|
||||
database,
|
||||
table,
|
||||
partition_id,
|
||||
@ -255,9 +255,9 @@ SELECT_DETACHED_DATA_PARTS = r'''SELECT
|
||||
max_block_number,
|
||||
level
|
||||
FROM system.detached_parts
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_PROCESSES = r'''SELECT
|
||||
SELECT_PROCESSES = r"""SELECT
|
||||
elapsed,
|
||||
query_id,
|
||||
{% if normalize_queries -%}
|
||||
@ -285,9 +285,9 @@ SELECT_PROCESSES = r'''SELECT
|
||||
{% endif -%}
|
||||
FROM system.processes
|
||||
ORDER BY elapsed DESC
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT
|
||||
SELECT_TOP_QUERIES_BY_DURATION = r"""SELECT
|
||||
type,
|
||||
query_start_time,
|
||||
query_duration_ms,
|
||||
@ -339,9 +339,9 @@ WHERE type != 'QueryStart'
|
||||
AND event_time >= now() - INTERVAL 1 DAY
|
||||
ORDER BY query_duration_ms DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT
|
||||
SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r"""SELECT
|
||||
type,
|
||||
query_start_time,
|
||||
query_duration_ms,
|
||||
@ -393,9 +393,9 @@ WHERE type != 'QueryStart'
|
||||
AND event_time >= now() - INTERVAL 1 DAY
|
||||
ORDER BY memory_usage DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_FAILED_QUERIES = r'''SELECT
|
||||
SELECT_FAILED_QUERIES = r"""SELECT
|
||||
type,
|
||||
query_start_time,
|
||||
query_duration_ms,
|
||||
@ -448,9 +448,9 @@ WHERE type != 'QueryStart'
|
||||
AND exception != ''
|
||||
ORDER BY query_start_time DESC
|
||||
LIMIT 10
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_STACK_TRACES = r'''SELECT
|
||||
SELECT_STACK_TRACES = r"""SELECT
|
||||
'\n' || arrayStringConcat(
|
||||
arrayMap(
|
||||
x,
|
||||
@ -459,9 +459,9 @@ SELECT_STACK_TRACES = r'''SELECT
|
||||
arrayMap(x -> demangle(addressToSymbol(x)), trace)),
|
||||
'\n') AS trace
|
||||
FROM system.stack_trace
|
||||
'''
|
||||
"""
|
||||
|
||||
SELECT_CRASH_LOG = r'''SELECT
|
||||
SELECT_CRASH_LOG = r"""SELECT
|
||||
event_time,
|
||||
signal,
|
||||
thread_id,
|
||||
@ -470,7 +470,7 @@ SELECT_CRASH_LOG = r'''SELECT
|
||||
version
|
||||
FROM system.crash_log
|
||||
ORDER BY event_time DESC
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def retry(exception_types, max_attempts=5, max_interval=5):
|
||||
@ -481,7 +481,8 @@ def retry(exception_types, max_attempts=5, max_interval=5):
|
||||
retry=tenacity.retry_if_exception_type(exception_types),
|
||||
wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval),
|
||||
stop=tenacity.stop_after_attempt(max_attempts),
|
||||
reraise=True)
|
||||
reraise=True,
|
||||
)
|
||||
|
||||
|
||||
class ClickhouseError(Exception):
|
||||
@ -502,9 +503,9 @@ class ClickhouseClient:
|
||||
def __init__(self, *, host="localhost", port=8123, user="default", password):
|
||||
self._session = requests.Session()
|
||||
if user:
|
||||
self._session.headers['X-ClickHouse-User'] = user
|
||||
self._session.headers['X-ClickHouse-Key'] = password
|
||||
self._url = f'http://{host}:{port}'
|
||||
self._session.headers["X-ClickHouse-User"] = user
|
||||
self._session.headers["X-ClickHouse-Key"] = password
|
||||
self._url = f"http://{host}:{port}"
|
||||
self._timeout = 60
|
||||
self._ch_version = None
|
||||
|
||||
@ -516,7 +517,16 @@ class ClickhouseClient:
|
||||
return self._ch_version
|
||||
|
||||
@retry(requests.exceptions.ConnectionError)
|
||||
def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False):
|
||||
def query(
|
||||
self,
|
||||
query,
|
||||
query_args=None,
|
||||
format=None,
|
||||
post_data=None,
|
||||
timeout=None,
|
||||
echo=False,
|
||||
dry_run=False,
|
||||
):
|
||||
"""
|
||||
Execute query.
|
||||
"""
|
||||
@ -524,28 +534,30 @@ class ClickhouseClient:
|
||||
query = self.render_query(query, **query_args)
|
||||
|
||||
if format:
|
||||
query += f' FORMAT {format}'
|
||||
query += f" FORMAT {format}"
|
||||
|
||||
if timeout is None:
|
||||
timeout = self._timeout
|
||||
|
||||
if echo:
|
||||
print(sqlparse.format(query, reindent=True), '\n')
|
||||
print(sqlparse.format(query, reindent=True), "\n")
|
||||
|
||||
if dry_run:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = self._session.post(self._url,
|
||||
params={
|
||||
'query': query,
|
||||
},
|
||||
json=post_data,
|
||||
timeout=timeout)
|
||||
response = self._session.post(
|
||||
self._url,
|
||||
params={
|
||||
"query": query,
|
||||
},
|
||||
json=post_data,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
if format in ('JSON', 'JSONCompact'):
|
||||
if format in ("JSON", "JSONCompact"):
|
||||
return response.json()
|
||||
|
||||
return response.text.strip()
|
||||
@ -555,7 +567,9 @@ class ClickhouseClient:
|
||||
def render_query(self, query, **kwargs):
|
||||
env = jinja2.Environment()
|
||||
|
||||
env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version)
|
||||
env.globals["version_ge"] = lambda version: version_ge(
|
||||
self.clickhouse_version, version
|
||||
)
|
||||
|
||||
template = env.from_string(query)
|
||||
return template.render(kwargs)
|
||||
@ -578,11 +592,13 @@ class ClickhouseConfig:
|
||||
|
||||
@classmethod
|
||||
def load(cls):
|
||||
return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml'))
|
||||
return ClickhouseConfig(
|
||||
cls._load_config("/var/lib/clickhouse/preprocessed_configs/config.xml")
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _load_config(config_path):
|
||||
with open(config_path, 'r') as file:
|
||||
with open(config_path, "r") as file:
|
||||
return xmltodict.parse(file.read())
|
||||
|
||||
@classmethod
|
||||
@ -591,8 +607,8 @@ class ClickhouseConfig:
|
||||
for key, value in list(config.items()):
|
||||
if isinstance(value, MutableMapping):
|
||||
cls._mask_secrets(config[key])
|
||||
elif key in ('password', 'secret_access_key', 'header', 'identity'):
|
||||
config[key] = '*****'
|
||||
elif key in ("password", "secret_access_key", "header", "identity"):
|
||||
config[key] = "*****"
|
||||
|
||||
|
||||
class DiagnosticsData:
|
||||
@ -603,53 +619,53 @@ class DiagnosticsData:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.host = args.host
|
||||
self._sections = [{'section': None, 'data': {}}]
|
||||
self._sections = [{"section": None, "data": {}}]
|
||||
|
||||
def add_string(self, name, value, section=None):
|
||||
self._section(section)[name] = {
|
||||
'type': 'string',
|
||||
'value': value,
|
||||
"type": "string",
|
||||
"value": value,
|
||||
}
|
||||
|
||||
def add_xml_document(self, name, document, section=None):
|
||||
self._section(section)[name] = {
|
||||
'type': 'xml',
|
||||
'value': document,
|
||||
"type": "xml",
|
||||
"value": document,
|
||||
}
|
||||
|
||||
def add_query(self, name, query, result, section=None):
|
||||
self._section(section)[name] = {
|
||||
'type': 'query',
|
||||
'query': query,
|
||||
'result': result,
|
||||
"type": "query",
|
||||
"query": query,
|
||||
"result": result,
|
||||
}
|
||||
|
||||
def add_command(self, name, command, result, section=None):
|
||||
self._section(section)[name] = {
|
||||
'type': 'command',
|
||||
'command': command,
|
||||
'result': result,
|
||||
"type": "command",
|
||||
"command": command,
|
||||
"result": result,
|
||||
}
|
||||
|
||||
def dump(self, format):
|
||||
if format.startswith('json'):
|
||||
if format.startswith("json"):
|
||||
result = self._dump_json()
|
||||
elif format.startswith('yaml'):
|
||||
elif format.startswith("yaml"):
|
||||
result = self._dump_yaml()
|
||||
else:
|
||||
result = self._dump_wiki()
|
||||
|
||||
if format.endswith('.gz'):
|
||||
compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer)
|
||||
if format.endswith(".gz"):
|
||||
compressor = gzip.GzipFile(mode="wb", fileobj=sys.stdout.buffer)
|
||||
compressor.write(result.encode())
|
||||
else:
|
||||
print(result)
|
||||
|
||||
def _section(self, name=None):
|
||||
if self._sections[-1]['section'] != name:
|
||||
self._sections.append({'section': name, 'data': {}})
|
||||
if self._sections[-1]["section"] != name:
|
||||
self._sections.append({"section": name, "data": {}})
|
||||
|
||||
return self._sections[-1]['data']
|
||||
return self._sections[-1]["data"]
|
||||
|
||||
def _dump_json(self):
|
||||
"""
|
||||
@ -669,85 +685,85 @@ class DiagnosticsData:
|
||||
"""
|
||||
|
||||
def _write_title(buffer, value):
|
||||
buffer.write(f'### {value}\n')
|
||||
buffer.write(f"### {value}\n")
|
||||
|
||||
def _write_subtitle(buffer, value):
|
||||
buffer.write(f'#### {value}\n')
|
||||
buffer.write(f"#### {value}\n")
|
||||
|
||||
def _write_string_item(buffer, name, item):
|
||||
value = item['value']
|
||||
if value != '':
|
||||
value = f'**{value}**'
|
||||
buffer.write(f'{name}: {value}\n')
|
||||
value = item["value"]
|
||||
if value != "":
|
||||
value = f"**{value}**"
|
||||
buffer.write(f"{name}: {value}\n")
|
||||
|
||||
def _write_xml_item(buffer, section_name, name, item):
|
||||
if section_name:
|
||||
buffer.write(f'##### {name}\n')
|
||||
buffer.write(f"##### {name}\n")
|
||||
else:
|
||||
_write_subtitle(buffer, name)
|
||||
|
||||
_write_result(buffer, item['value'], format='XML')
|
||||
_write_result(buffer, item["value"], format="XML")
|
||||
|
||||
def _write_query_item(buffer, section_name, name, item):
|
||||
if section_name:
|
||||
buffer.write(f'##### {name}\n')
|
||||
buffer.write(f"##### {name}\n")
|
||||
else:
|
||||
_write_subtitle(buffer, name)
|
||||
|
||||
_write_query(buffer, item['query'])
|
||||
_write_result(buffer, item['result'])
|
||||
_write_query(buffer, item["query"])
|
||||
_write_result(buffer, item["result"])
|
||||
|
||||
def _write_command_item(buffer, section_name, name, item):
|
||||
if section_name:
|
||||
buffer.write(f'##### {name}\n')
|
||||
buffer.write(f"##### {name}\n")
|
||||
else:
|
||||
_write_subtitle(buffer, name)
|
||||
|
||||
_write_command(buffer, item['command'])
|
||||
_write_result(buffer, item['result'])
|
||||
_write_command(buffer, item["command"])
|
||||
_write_result(buffer, item["result"])
|
||||
|
||||
def _write_unknown_item(buffer, section_name, name, item):
|
||||
if section_name:
|
||||
buffer.write(f'**{name}**\n')
|
||||
buffer.write(f"**{name}**\n")
|
||||
else:
|
||||
_write_subtitle(buffer, name)
|
||||
|
||||
json.dump(item, buffer, indent=2)
|
||||
|
||||
def _write_query(buffer, query):
|
||||
buffer.write('**query**\n')
|
||||
buffer.write('```sql\n')
|
||||
buffer.write("**query**\n")
|
||||
buffer.write("```sql\n")
|
||||
buffer.write(query)
|
||||
buffer.write('\n```\n')
|
||||
buffer.write("\n```\n")
|
||||
|
||||
def _write_command(buffer, command):
|
||||
buffer.write('**command**\n')
|
||||
buffer.write('```\n')
|
||||
buffer.write("**command**\n")
|
||||
buffer.write("```\n")
|
||||
buffer.write(command)
|
||||
buffer.write('\n```\n')
|
||||
buffer.write("\n```\n")
|
||||
|
||||
def _write_result(buffer, result, format=None):
|
||||
buffer.write('**result**\n')
|
||||
buffer.write(f'```{format}\n' if format else '```\n')
|
||||
buffer.write("**result**\n")
|
||||
buffer.write(f"```{format}\n" if format else "```\n")
|
||||
buffer.write(result)
|
||||
buffer.write('\n```\n')
|
||||
buffer.write("\n```\n")
|
||||
|
||||
buffer = io.StringIO()
|
||||
|
||||
_write_title(buffer, f'Diagnostics data for host {self.host}')
|
||||
_write_title(buffer, f"Diagnostics data for host {self.host}")
|
||||
for section in self._sections:
|
||||
section_name = section['section']
|
||||
section_name = section["section"]
|
||||
if section_name:
|
||||
_write_subtitle(buffer, section_name)
|
||||
|
||||
for name, item in section['data'].items():
|
||||
if item['type'] == 'string':
|
||||
for name, item in section["data"].items():
|
||||
if item["type"] == "string":
|
||||
_write_string_item(buffer, name, item)
|
||||
elif item['type'] == 'query':
|
||||
elif item["type"] == "query":
|
||||
_write_query_item(buffer, section_name, name, item)
|
||||
elif item['type'] == 'command':
|
||||
elif item["type"] == "command":
|
||||
_write_command_item(buffer, section_name, name, item)
|
||||
elif item['type'] == 'xml':
|
||||
elif item["type"] == "xml":
|
||||
_write_xml_item(buffer, section_name, name, item)
|
||||
else:
|
||||
_write_unknown_item(buffer, section_name, name, item)
|
||||
@ -760,126 +776,196 @@ def main():
|
||||
Program entry point.
|
||||
"""
|
||||
args = parse_args()
|
||||
timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')
|
||||
client = ClickhouseClient(host=args.host, port=args.port, user=args.user, password=args.password)
|
||||
timestamp = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
|
||||
client = ClickhouseClient(
|
||||
host=args.host, port=args.port, user=args.user, password=args.password
|
||||
)
|
||||
ch_config = ClickhouseConfig.load()
|
||||
version = client.clickhouse_version
|
||||
system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']]
|
||||
system_tables = [
|
||||
row[0]
|
||||
for row in execute_query(client, SELECT_SYSTEM_TABLES, format="JSONCompact")[
|
||||
"data"
|
||||
]
|
||||
]
|
||||
|
||||
diagnostics = DiagnosticsData(args)
|
||||
diagnostics.add_string('Version', version)
|
||||
diagnostics.add_string('Timestamp', timestamp)
|
||||
diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME))
|
||||
diagnostics.add_string("Version", version)
|
||||
diagnostics.add_string("Timestamp", timestamp)
|
||||
diagnostics.add_string("Uptime", execute_query(client, SELECT_UPTIME))
|
||||
|
||||
diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump())
|
||||
diagnostics.add_xml_document("ClickHouse configuration", ch_config.dump())
|
||||
|
||||
if version_ge(version, '20.8'):
|
||||
add_query(diagnostics, 'Access configuration',
|
||||
client=client,
|
||||
query=SELECT_ACCESS,
|
||||
format='TSVRaw')
|
||||
add_query(diagnostics, 'Quotas',
|
||||
client=client,
|
||||
query=SELECT_QUOTA_USAGE,
|
||||
format='Vertical')
|
||||
if version_ge(version, "20.8"):
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Access configuration",
|
||||
client=client,
|
||||
query=SELECT_ACCESS,
|
||||
format="TSVRaw",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Quotas",
|
||||
client=client,
|
||||
query=SELECT_QUOTA_USAGE,
|
||||
format="Vertical",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'Database engines',
|
||||
client=client,
|
||||
query=SELECT_DATABASE_ENGINES,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Schema')
|
||||
add_query(diagnostics, 'Databases (top 10 by size)',
|
||||
client=client,
|
||||
query=SELECT_DATABASES,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Schema')
|
||||
add_query(diagnostics, 'Table engines',
|
||||
client=client,
|
||||
query=SELECT_TABLE_ENGINES,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Schema')
|
||||
add_query(diagnostics, 'Dictionaries',
|
||||
client=client,
|
||||
query=SELECT_DICTIONARIES,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Schema')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Database engines",
|
||||
client=client,
|
||||
query=SELECT_DATABASE_ENGINES,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Schema",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Databases (top 10 by size)",
|
||||
client=client,
|
||||
query=SELECT_DATABASES,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Schema",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Table engines",
|
||||
client=client,
|
||||
query=SELECT_TABLE_ENGINES,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Schema",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Dictionaries",
|
||||
client=client,
|
||||
query=SELECT_DICTIONARIES,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Schema",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)',
|
||||
client=client,
|
||||
query=SELECT_REPLICAS,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Replication')
|
||||
add_query(diagnostics, 'Replication queue (top 20 oldest tasks)',
|
||||
client=client,
|
||||
query=SELECT_REPLICATION_QUEUE,
|
||||
format='Vertical',
|
||||
section='Replication')
|
||||
if version_ge(version, '21.3'):
|
||||
add_query(diagnostics, 'Replicated fetches',
|
||||
client=client,
|
||||
query=SELECT_REPLICATED_FETCHES,
|
||||
format='Vertical',
|
||||
section='Replication')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Replicated tables (top 10 by absolute delay)",
|
||||
client=client,
|
||||
query=SELECT_REPLICAS,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Replication",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Replication queue (top 20 oldest tasks)",
|
||||
client=client,
|
||||
query=SELECT_REPLICATION_QUEUE,
|
||||
format="Vertical",
|
||||
section="Replication",
|
||||
)
|
||||
if version_ge(version, "21.3"):
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Replicated fetches",
|
||||
client=client,
|
||||
query=SELECT_REPLICATED_FETCHES,
|
||||
format="Vertical",
|
||||
section="Replication",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'Top 10 tables by max parts per partition',
|
||||
client=client,
|
||||
query=SELECT_PARTS_PER_TABLE,
|
||||
format='PrettyCompactNoEscapes')
|
||||
add_query(diagnostics, 'Merges in progress',
|
||||
client=client,
|
||||
query=SELECT_MERGES,
|
||||
format='Vertical')
|
||||
add_query(diagnostics, 'Mutations in progress',
|
||||
client=client,
|
||||
query=SELECT_MUTATIONS,
|
||||
format='Vertical')
|
||||
add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)',
|
||||
client=client,
|
||||
query=SELECT_RECENT_DATA_PARTS,
|
||||
format='Vertical')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Top 10 tables by max parts per partition",
|
||||
client=client,
|
||||
query=SELECT_PARTS_PER_TABLE,
|
||||
format="PrettyCompactNoEscapes",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Merges in progress",
|
||||
client=client,
|
||||
query=SELECT_MERGES,
|
||||
format="Vertical",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Mutations in progress",
|
||||
client=client,
|
||||
query=SELECT_MUTATIONS,
|
||||
format="Vertical",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Recent data parts (modification time within last 3 minutes)",
|
||||
client=client,
|
||||
query=SELECT_RECENT_DATA_PARTS,
|
||||
format="Vertical",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'system.detached_parts',
|
||||
client=client,
|
||||
query=SELECT_DETACHED_DATA_PARTS,
|
||||
format='PrettyCompactNoEscapes',
|
||||
section='Detached data')
|
||||
add_command(diagnostics, 'Disk space usage',
|
||||
command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh',
|
||||
section='Detached data')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"system.detached_parts",
|
||||
client=client,
|
||||
query=SELECT_DETACHED_DATA_PARTS,
|
||||
format="PrettyCompactNoEscapes",
|
||||
section="Detached data",
|
||||
)
|
||||
add_command(
|
||||
diagnostics,
|
||||
"Disk space usage",
|
||||
command="du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh",
|
||||
section="Detached data",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'Queries in progress (process list)',
|
||||
client=client,
|
||||
query=SELECT_PROCESSES,
|
||||
format='Vertical',
|
||||
section='Queries')
|
||||
add_query(diagnostics, 'Top 10 queries by duration',
|
||||
client=client,
|
||||
query=SELECT_TOP_QUERIES_BY_DURATION,
|
||||
format='Vertical',
|
||||
section='Queries')
|
||||
add_query(diagnostics, 'Top 10 queries by memory usage',
|
||||
client=client,
|
||||
query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE,
|
||||
format='Vertical',
|
||||
section='Queries')
|
||||
add_query(diagnostics, 'Last 10 failed queries',
|
||||
client=client,
|
||||
query=SELECT_FAILED_QUERIES,
|
||||
format='Vertical',
|
||||
section='Queries')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Queries in progress (process list)",
|
||||
client=client,
|
||||
query=SELECT_PROCESSES,
|
||||
format="Vertical",
|
||||
section="Queries",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Top 10 queries by duration",
|
||||
client=client,
|
||||
query=SELECT_TOP_QUERIES_BY_DURATION,
|
||||
format="Vertical",
|
||||
section="Queries",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Top 10 queries by memory usage",
|
||||
client=client,
|
||||
query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE,
|
||||
format="Vertical",
|
||||
section="Queries",
|
||||
)
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Last 10 failed queries",
|
||||
client=client,
|
||||
query=SELECT_FAILED_QUERIES,
|
||||
format="Vertical",
|
||||
section="Queries",
|
||||
)
|
||||
|
||||
add_query(diagnostics, 'Stack traces',
|
||||
client=client,
|
||||
query=SELECT_STACK_TRACES,
|
||||
format='Vertical')
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Stack traces",
|
||||
client=client,
|
||||
query=SELECT_STACK_TRACES,
|
||||
format="Vertical",
|
||||
)
|
||||
|
||||
if 'crash_log' in system_tables:
|
||||
add_query(diagnostics, 'Crash log',
|
||||
client=client,
|
||||
query=SELECT_CRASH_LOG,
|
||||
format='Vertical')
|
||||
if "crash_log" in system_tables:
|
||||
add_query(
|
||||
diagnostics,
|
||||
"Crash log",
|
||||
client=client,
|
||||
query=SELECT_CRASH_LOG,
|
||||
format="Vertical",
|
||||
)
|
||||
|
||||
add_command(diagnostics, 'uname', 'uname -a')
|
||||
add_command(diagnostics, "uname", "uname -a")
|
||||
|
||||
diagnostics.dump(args.format)
|
||||
|
||||
@ -889,29 +975,34 @@ def parse_args():
|
||||
Parse command-line arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--format',
|
||||
choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'],
|
||||
default='wiki')
|
||||
parser.add_argument('--normalize-queries',
|
||||
action='store_true',
|
||||
default=False)
|
||||
parser.add_argument('--host', dest="host", help="clickhouse host")
|
||||
parser.add_argument('--port', dest="port", default=8123, help="clickhouse http port")
|
||||
parser.add_argument('--user', dest="user", default="default", help="clickhouse user")
|
||||
parser.add_argument('--password', dest="password", help="clickhouse password")
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"],
|
||||
default="wiki",
|
||||
)
|
||||
parser.add_argument("--normalize-queries", action="store_true", default=False)
|
||||
parser.add_argument("--host", dest="host", help="clickhouse host")
|
||||
parser.add_argument(
|
||||
"--port", dest="port", default=8123, help="clickhouse http port"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--user", dest="user", default="default", help="clickhouse user"
|
||||
)
|
||||
parser.add_argument("--password", dest="password", help="clickhouse password")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def add_query(diagnostics, name, client, query, format, section=None):
|
||||
query_args = {
|
||||
'normalize_queries': diagnostics.args.normalize_queries,
|
||||
"normalize_queries": diagnostics.args.normalize_queries,
|
||||
}
|
||||
query = client.render_query(query, **query_args)
|
||||
diagnostics.add_query(
|
||||
name=name,
|
||||
query=query,
|
||||
result=execute_query(client, query, render_query=False, format=format),
|
||||
section=section)
|
||||
section=section,
|
||||
)
|
||||
|
||||
|
||||
def execute_query(client, query, render_query=True, format=None):
|
||||
@ -926,14 +1017,18 @@ def execute_query(client, query, render_query=True, format=None):
|
||||
|
||||
def add_command(diagnostics, name, command, section=None):
|
||||
diagnostics.add_command(
|
||||
name=name,
|
||||
command=command,
|
||||
result=execute_command(command),
|
||||
section=section)
|
||||
name=name, command=command, result=execute_command(command), section=section
|
||||
)
|
||||
|
||||
|
||||
def execute_command(command, input=None):
|
||||
proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
proc = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
if isinstance(input, str):
|
||||
input = input.encode()
|
||||
@ -941,7 +1036,7 @@ def execute_command(command, input=None):
|
||||
stdout, stderr = proc.communicate(input=input)
|
||||
|
||||
if proc.returncode:
|
||||
return f'failed with exit code {proc.returncode}\n{stderr.decode()}'
|
||||
return f"failed with exit code {proc.returncode}\n{stderr.decode()}"
|
||||
|
||||
return stdout.decode()
|
||||
|
||||
@ -957,8 +1052,8 @@ def parse_version(version):
|
||||
"""
|
||||
Parse version string.
|
||||
"""
|
||||
return [int(x) for x in version.strip().split('.') if x.isnumeric()]
|
||||
return [int(x) for x in version.strip().split(".") if x.isnumeric()]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -28,39 +28,48 @@ class S3API(object):
|
||||
bucket = self.connection.get_bucket(bucket_name)
|
||||
key = bucket.initiate_multipart_upload(s3_path)
|
||||
logging.info("Will upload to s3 path %s", s3_path)
|
||||
chunksize = 1024 * 1024 * 1024 # 1 GB
|
||||
chunksize = 1024 * 1024 * 1024 # 1 GB
|
||||
filesize = os.stat(file_path).st_size
|
||||
logging.info("File size is %s", filesize)
|
||||
chunkcount = int(math.ceil(filesize / chunksize))
|
||||
|
||||
def call_back(x, y):
|
||||
print("Uploaded {}/{} bytes".format(x, y))
|
||||
|
||||
try:
|
||||
for i in range(chunkcount + 1):
|
||||
logging.info("Uploading chunk %s of %s", i, chunkcount + 1)
|
||||
offset = chunksize * i
|
||||
bytes_size = min(chunksize, filesize - offset)
|
||||
|
||||
with open(file_path, 'r') as fp:
|
||||
with open(file_path, "r") as fp:
|
||||
fp.seek(offset)
|
||||
key.upload_part_from_file(fp=fp, part_num=i+1,
|
||||
size=bytes_size, cb=call_back,
|
||||
num_cb=100)
|
||||
key.upload_part_from_file(
|
||||
fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100
|
||||
)
|
||||
key.complete_upload()
|
||||
except Exception as ex:
|
||||
key.cancel_upload()
|
||||
raise ex
|
||||
logging.info("Contents were set")
|
||||
return "https://{bucket}.{mds_url}/{path}".format(
|
||||
bucket=bucket_name, mds_url=self.mds_url, path=s3_path)
|
||||
bucket=bucket_name, mds_url=self.mds_url, path=s3_path
|
||||
)
|
||||
|
||||
def set_file_contents(self, bucket, local_file_path, s3_file_path):
|
||||
key = Key(bucket)
|
||||
key.key = s3_file_path
|
||||
file_size = os.stat(local_file_path).st_size
|
||||
logging.info("Uploading file `%s` to `%s`. Size is %s", local_file_path, s3_file_path, file_size)
|
||||
logging.info(
|
||||
"Uploading file `%s` to `%s`. Size is %s",
|
||||
local_file_path,
|
||||
s3_file_path,
|
||||
file_size,
|
||||
)
|
||||
|
||||
def call_back(x, y):
|
||||
print("Uploaded {}/{} bytes".format(x, y))
|
||||
|
||||
key.set_contents_from_filename(local_file_path, cb=call_back)
|
||||
|
||||
def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path):
|
||||
@ -74,12 +83,14 @@ class S3API(object):
|
||||
path = root.split(os.sep)
|
||||
for file in files:
|
||||
local_file_path = os.path.join(root, file)
|
||||
s3_file = local_file_path[len(directory_path) + 1:]
|
||||
s3_file = local_file_path[len(directory_path) + 1 :]
|
||||
s3_file_path = os.path.join(s3_path, s3_file)
|
||||
self.set_file_contents(bucket, local_file_path, s3_file_path)
|
||||
|
||||
logging.info("Uploading finished")
|
||||
return "https://{bucket}.{mds_url}/{path}".format(bucket=bucket_name, mds_url=self.mds_url, path=s3_path)
|
||||
return "https://{bucket}.{mds_url}/{path}".format(
|
||||
bucket=bucket_name, mds_url=self.mds_url, path=s3_path
|
||||
)
|
||||
|
||||
def list_bucket_keys(self, bucket_name):
|
||||
bucket = self.connection.get_bucket(bucket_name)
|
||||
@ -91,100 +102,121 @@ class S3API(object):
|
||||
bucket.get_all_keys()
|
||||
for obj in bucket.get_all_keys():
|
||||
if obj.key.startswith(folder_path):
|
||||
print('Removing ' + obj.key)
|
||||
print("Removing " + obj.key)
|
||||
obj.delete()
|
||||
|
||||
|
||||
def make_tar_file_for_table(clickhouse_data_path, db_name, table_name,
|
||||
tmp_prefix):
|
||||
def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix):
|
||||
|
||||
relative_data_path = os.path.join('data', db_name, table_name)
|
||||
relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql')
|
||||
relative_data_path = os.path.join("data", db_name, table_name)
|
||||
relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql")
|
||||
path_to_data = os.path.join(clickhouse_data_path, relative_data_path)
|
||||
path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path)
|
||||
temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name)
|
||||
temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name)
|
||||
with tarfile.open(temporary_file_name, "w") as bundle:
|
||||
bundle.add(path_to_data, arcname=relative_data_path)
|
||||
bundle.add(path_to_metadata, arcname=relative_meta_path)
|
||||
return temporary_file_name
|
||||
|
||||
|
||||
USAGE_EXAMPLES = '''
|
||||
USAGE_EXAMPLES = """
|
||||
examples:
|
||||
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket
|
||||
\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/
|
||||
'''
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple tool for uploading datasets to clickhouse S3",
|
||||
usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES))
|
||||
parser.add_argument('--s3-api-url', default='s3.amazonaws.com')
|
||||
parser.add_argument('--s3-common-url', default='s3.amazonaws.com')
|
||||
parser.add_argument('--bucket-name', default='clickhouse-datasets')
|
||||
parser.add_argument('--dataset-name', required=True,
|
||||
help='Name of dataset, will be used in uploaded path')
|
||||
parser.add_argument('--access-key-id', required=True)
|
||||
parser.add_argument('--secret-access-key', required=True)
|
||||
parser.add_argument('--clickhouse-data-path',
|
||||
default='/var/lib/clickhouse/',
|
||||
help='Path to clickhouse database on filesystem')
|
||||
parser.add_argument('--s3-path', help='Path in s3, where to upload file')
|
||||
parser.add_argument('--tmp-prefix', default='/tmp',
|
||||
help='Prefix to store temporary downloaded file')
|
||||
usage="%(prog)s [options] {}".format(USAGE_EXAMPLES),
|
||||
)
|
||||
parser.add_argument("--s3-api-url", default="s3.amazonaws.com")
|
||||
parser.add_argument("--s3-common-url", default="s3.amazonaws.com")
|
||||
parser.add_argument("--bucket-name", default="clickhouse-datasets")
|
||||
parser.add_argument(
|
||||
"--dataset-name",
|
||||
required=True,
|
||||
help="Name of dataset, will be used in uploaded path",
|
||||
)
|
||||
parser.add_argument("--access-key-id", required=True)
|
||||
parser.add_argument("--secret-access-key", required=True)
|
||||
parser.add_argument(
|
||||
"--clickhouse-data-path",
|
||||
default="/var/lib/clickhouse/",
|
||||
help="Path to clickhouse database on filesystem",
|
||||
)
|
||||
parser.add_argument("--s3-path", help="Path in s3, where to upload file")
|
||||
parser.add_argument(
|
||||
"--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file"
|
||||
)
|
||||
data_group = parser.add_mutually_exclusive_group(required=True)
|
||||
table_name_argument = data_group.add_argument('--table-name',
|
||||
help='Name of table with database, if you are uploading partitions')
|
||||
data_group.add_argument('--file-path',
|
||||
help='Name of file, if you are uploading')
|
||||
data_group.add_argument('--directory-path', help='Path to directory with files to upload')
|
||||
data_group.add_argument('--list-directory', help='List s3 directory by --directory-path')
|
||||
data_group.add_argument('--remove-directory', help='Remove s3 directory by --directory-path')
|
||||
table_name_argument = data_group.add_argument(
|
||||
"--table-name",
|
||||
help="Name of table with database, if you are uploading partitions",
|
||||
)
|
||||
data_group.add_argument("--file-path", help="Name of file, if you are uploading")
|
||||
data_group.add_argument(
|
||||
"--directory-path", help="Path to directory with files to upload"
|
||||
)
|
||||
data_group.add_argument(
|
||||
"--list-directory", help="List s3 directory by --directory-path"
|
||||
)
|
||||
data_group.add_argument(
|
||||
"--remove-directory", help="Remove s3 directory by --directory-path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.table_name is not None and args.clickhouse_data_path is None:
|
||||
raise argparse.ArgumentError(table_name_argument,
|
||||
"You should specify --clickhouse-data-path to upload --table")
|
||||
raise argparse.ArgumentError(
|
||||
table_name_argument,
|
||||
"You should specify --clickhouse-data-path to upload --table",
|
||||
)
|
||||
|
||||
s3_conn = S3API(
|
||||
args.access_key_id, args.secret_access_key,
|
||||
args.s3_api_url, args.s3_common_url)
|
||||
args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url
|
||||
)
|
||||
|
||||
file_path = ''
|
||||
file_path = ""
|
||||
directory_path = args.directory_path
|
||||
s3_path = args.s3_path
|
||||
|
||||
if args.list_directory:
|
||||
s3_conn.list_bucket_keys(args.bucket_name)
|
||||
elif args.remove_directory:
|
||||
print('Removing s3 path: ' + args.remove_directory)
|
||||
print("Removing s3 path: " + args.remove_directory)
|
||||
s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory)
|
||||
elif args.directory_path is not None:
|
||||
url = s3_conn.upload_data_for_static_files_disk(args.bucket_name, directory_path, s3_path)
|
||||
url = s3_conn.upload_data_for_static_files_disk(
|
||||
args.bucket_name, directory_path, s3_path
|
||||
)
|
||||
logging.info("Data uploaded: %s", url)
|
||||
else:
|
||||
|
||||
if args.table_name is not None:
|
||||
if '.' not in args.table_name:
|
||||
db_name = 'default'
|
||||
if "." not in args.table_name:
|
||||
db_name = "default"
|
||||
else:
|
||||
db_name, table_name = args.table_name.split('.')
|
||||
db_name, table_name = args.table_name.split(".")
|
||||
file_path = make_tar_file_for_table(
|
||||
args.clickhouse_data_path, db_name, table_name, args.tmp_prefix)
|
||||
args.clickhouse_data_path, db_name, table_name, args.tmp_prefix
|
||||
)
|
||||
else:
|
||||
file_path = args.file_path
|
||||
|
||||
if 'tsv' in file_path:
|
||||
if "tsv" in file_path:
|
||||
s3_path = os.path.join(
|
||||
args.dataset_name, 'tsv', os.path.basename(file_path))
|
||||
args.dataset_name, "tsv", os.path.basename(file_path)
|
||||
)
|
||||
if args.table_name is not None:
|
||||
s3_path = os.path.join(
|
||||
args.dataset_name, 'partitions', os.path.basename(file_path))
|
||||
args.dataset_name, "partitions", os.path.basename(file_path)
|
||||
)
|
||||
elif args.s3_path is not None:
|
||||
s3_path = os.path.join(
|
||||
args.dataset_name, args.s3_path, os.path.basename(file_path))
|
||||
args.dataset_name, args.s3_path, os.path.basename(file_path)
|
||||
)
|
||||
else:
|
||||
raise Exception("Don't know s3-path to upload")
|
||||
|
||||
|
@ -11,13 +11,14 @@ from termcolor import colored
|
||||
import sys
|
||||
|
||||
COLORMAP = {
|
||||
"success": colored("success", 'green'),
|
||||
"failure": colored("failure", 'red'),
|
||||
"error": colored("error", 'red'),
|
||||
"pending": colored("pending", 'yellow'),
|
||||
"not run": colored("not run", 'white'),
|
||||
"success": colored("success", "green"),
|
||||
"failure": colored("failure", "red"),
|
||||
"error": colored("error", "red"),
|
||||
"pending": colored("pending", "yellow"),
|
||||
"not run": colored("not run", "white"),
|
||||
}
|
||||
|
||||
|
||||
def _filter_statuses(statuses):
|
||||
"""
|
||||
Squash statuses to latest state
|
||||
@ -69,7 +70,7 @@ if __name__ == "__main__":
|
||||
date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S")
|
||||
|
||||
gh = Github(args.token)
|
||||
repo = gh.get_repo('ClickHouse/ClickHouse')
|
||||
repo = gh.get_repo("ClickHouse/ClickHouse")
|
||||
commits = get_commits(repo, date_since)
|
||||
|
||||
longest_header = []
|
||||
@ -101,6 +102,6 @@ if __name__ == "__main__":
|
||||
result_data.append(current_result)
|
||||
|
||||
if sys.stdout.isatty():
|
||||
longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header]
|
||||
longest_header = [colored(h, "white", attrs=["bold"]) for h in longest_header]
|
||||
|
||||
print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid"))
|
||||
|
Loading…
Reference in New Issue
Block a user