ClickHouse/tests/queries/0_stateless/02473_multistep_split_prewhere.python
2024-09-27 10:19:49 +00:00

186 lines
6.9 KiB
Python

#!/usr/bin/env python3
import os
import sys
import requests
CURDIR = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
from pure_http_client import ClickHouseClient, requests_session_with_retries
class Tester:
"""
- Creates test table with multiple integer columns
- Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct
"""
def __init__(self, session, url, index_granularity, total_rows):
self.session = session
self.url = url
self.index_granularity = index_granularity
self.total_rows = total_rows
self.reported_errors = set()
self.repro_queries = []
def report_error(self):
print("Repro steps:", "\n\n\t".join(self.repro_queries))
exit(1)
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
self.repro_queries.append(query_text)
resp = self.session.post(self.url, data=query_text)
if resp.status_code != 200:
# Group similar errors
error = resp.text[0:40]
if error not in self.reported_errors:
self.reported_errors.add(error)
print("Code:", resp.status_code)
print("Result:", resp.text)
self.report_error()
result = resp.text
# Check that the result is as expected
if (not expected_data is None) and (int(result) != len(expected_data)):
print("Expected {} rows, got {}".format(len(expected_data), result))
print("Expected data:" + str(expected_data))
self.report_error()
if not include_in_repro_steps:
self.repro_queries.pop()
def check_data(
self, all_data, c_range_start, c_range_end, d_range_start, d_range_end
):
for to_select in [
"count()",
"sum(e)",
]: # Test reading with and without column with default value
self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data)
delta = 10
for b_range_start in [0, delta]:
for b_range_end in [self.total_rows - delta]: # , self.total_rows]:
expected = all_data[
(all_data.a == 0)
& (all_data.b > b_range_start)
& (all_data.b <= b_range_end)
]
self.query(
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
to_select, b_range_start, b_range_end
),
False,
expected,
)
expected = all_data[
(all_data.a == 0)
& (all_data.b > b_range_start)
& (all_data.b <= b_range_end)
& (all_data.c > c_range_start)
& (all_data.c <= c_range_end)
]
self.query(
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format(
to_select,
b_range_start,
b_range_end,
c_range_start,
c_range_end,
),
False,
expected,
)
expected = all_data[
(all_data.a == 0)
& (all_data.b > b_range_start)
& (all_data.b <= b_range_end)
& (all_data.c > c_range_start)
& (all_data.c <= c_range_end)
& (all_data.d > d_range_start)
& (all_data.d <= d_range_end)
]
self.query(
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format(
to_select,
b_range_start,
b_range_end,
c_range_start,
c_range_end,
d_range_start,
d_range_end,
),
False,
expected,
)
def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end):
self.repro_queries = []
self.query(
"""
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a))
ENGINE = MergeTree() ORDER BY (a, b)
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
self.index_granularity
)
)
self.query(
"INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format(
self.total_rows
)
)
client = ClickHouseClient()
all_data = client.query_return_df(
"SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;"
)
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
# After all data has been written add a column with default value
self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;")
self.check_data(
all_data, c_range_start, c_range_end, d_range_start, d_range_end
)
self.query("DROP TABLE tab_02473;")
def main():
# Enable multiple prewhere read steps
url = (
os.environ["CLICKHOUSE_URL"]
+ "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1"
)
default_index_granularity = 10
total_rows = 8 * default_index_granularity
step = default_index_granularity
session = requests_session_with_retries()
for index_granularity in [default_index_granularity - 1, default_index_granularity]:
tester = Tester(session, url, index_granularity, total_rows)
# Test combinations of ranges of columns c and d
for c_range_start in range(0, total_rows, int(2.3 * step)):
for c_range_end in range(
c_range_start + 3 * step, total_rows, int(2.1 * step)
):
for d_range_start in range(
int(0.5 * step), total_rows, int(2.7 * step)
):
for d_range_end in range(
d_range_start + 3 * step, total_rows, int(2.2 * step)
):
tester.run_test(
c_range_start, c_range_end, d_range_start, d_range_end
)
if __name__ == "__main__":
main()