2022-10-27 21:20:22 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import os
|
2022-12-25 14:44:31 +00:00
|
|
|
import sys
|
|
|
|
|
2024-09-27 10:19:39 +00:00
|
|
|
import requests
|
|
|
|
|
2022-12-25 14:44:31 +00:00
|
|
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
2023-03-23 15:33:23 +00:00
|
|
|
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
|
2022-12-25 14:44:31 +00:00
|
|
|
|
2024-08-02 14:05:39 +00:00
|
|
|
from pure_http_client import ClickHouseClient, requests_session_with_retries
|
2022-10-27 21:20:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Tester:
|
2023-03-23 15:33:23 +00:00
|
|
|
"""
|
2022-10-27 21:20:22 +00:00
|
|
|
- Creates test table
|
|
|
|
- Deletes the specified range of rows
|
|
|
|
- Masks another range using row-level policy
|
2022-12-25 14:44:31 +00:00
|
|
|
- Runs some read queries and checks that the results
|
2023-03-23 15:33:23 +00:00
|
|
|
"""
|
|
|
|
|
2022-10-27 21:20:22 +00:00
|
|
|
def __init__(self, session, url, index_granularity, total_rows):
|
|
|
|
self.session = session
|
|
|
|
self.url = url
|
|
|
|
self.index_granularity = index_granularity
|
|
|
|
self.total_rows = total_rows
|
|
|
|
self.reported_errors = set()
|
|
|
|
self.repro_queries = []
|
|
|
|
|
2022-12-25 14:44:31 +00:00
|
|
|
def report_error(self):
|
2023-03-23 15:33:23 +00:00
|
|
|
print("Repro steps:", "\n\n\t".join(self.repro_queries))
|
2022-12-25 14:44:31 +00:00
|
|
|
exit(1)
|
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
def query(self, query_text, include_in_repro_steps=True, expected_data=None):
|
2022-10-27 21:20:22 +00:00
|
|
|
self.repro_queries.append(query_text)
|
|
|
|
resp = self.session.post(self.url, data=query_text)
|
|
|
|
if resp.status_code != 200:
|
2022-11-16 17:14:11 +00:00
|
|
|
# Group similar errors
|
2022-10-27 21:20:22 +00:00
|
|
|
error = resp.text[0:40]
|
|
|
|
if error not in self.reported_errors:
|
2022-12-25 14:44:31 +00:00
|
|
|
self.reported_errors.add(error)
|
2023-03-23 15:33:23 +00:00
|
|
|
print("Code:", resp.status_code)
|
|
|
|
print("Result:", resp.text)
|
2022-12-25 14:44:31 +00:00
|
|
|
self.report_error()
|
|
|
|
|
|
|
|
result = resp.text
|
|
|
|
# Check that the result is as expected
|
2023-03-23 15:33:23 +00:00
|
|
|
if (not expected_data is None) and (int(result) != len(expected_data)):
|
|
|
|
print("Expected {} rows, got {}".format(len(expected_data), result))
|
|
|
|
print("Expected data:" + str(expected_data))
|
2022-12-25 14:44:31 +00:00
|
|
|
self.report_error()
|
2022-10-27 21:20:22 +00:00
|
|
|
|
|
|
|
if not include_in_repro_steps:
|
|
|
|
self.repro_queries.pop()
|
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
def check_data(
|
|
|
|
self,
|
|
|
|
all_data,
|
|
|
|
delete_range_start,
|
|
|
|
delete_range_end,
|
|
|
|
row_level_policy_range_start,
|
|
|
|
row_level_policy_range_end,
|
|
|
|
):
|
2022-12-25 14:44:31 +00:00
|
|
|
all_data_after_delete = all_data[
|
2023-03-23 15:33:23 +00:00
|
|
|
~(
|
|
|
|
(all_data.a == 0)
|
|
|
|
& (all_data.b > delete_range_start)
|
|
|
|
& (all_data.b <= delete_range_end)
|
|
|
|
)
|
|
|
|
]
|
2022-12-25 14:44:31 +00:00
|
|
|
all_data_after_row_policy = all_data_after_delete[
|
2023-03-23 15:33:23 +00:00
|
|
|
(all_data_after_delete.b <= row_level_policy_range_start)
|
|
|
|
| (all_data_after_delete.b > row_level_policy_range_end)
|
|
|
|
]
|
|
|
|
|
|
|
|
for to_select in [
|
|
|
|
"count()",
|
|
|
|
"sum(d)",
|
|
|
|
]: # Test reading with and without column with default value
|
|
|
|
self.query(
|
|
|
|
"SELECT {} FROM tab_02473;".format(to_select),
|
|
|
|
False,
|
|
|
|
all_data_after_row_policy,
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2022-11-16 17:14:11 +00:00
|
|
|
delta = 10
|
|
|
|
for query_range_start in [0, delta]:
|
2023-03-23 15:33:23 +00:00
|
|
|
for query_range_end in [self.total_rows - delta]: # , self.total_rows]:
|
2022-12-25 14:44:31 +00:00
|
|
|
expected = all_data_after_row_policy[
|
2023-03-23 15:33:23 +00:00
|
|
|
(all_data_after_row_policy.a == 0)
|
|
|
|
& (all_data_after_row_policy.b > query_range_start)
|
|
|
|
& (all_data_after_row_policy.b <= query_range_end)
|
|
|
|
]
|
|
|
|
self.query(
|
|
|
|
"SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format(
|
|
|
|
to_select, query_range_start, query_range_end
|
|
|
|
),
|
|
|
|
False,
|
|
|
|
expected,
|
|
|
|
)
|
2022-12-25 14:44:31 +00:00
|
|
|
|
|
|
|
expected = all_data_after_row_policy[
|
2023-03-23 15:33:23 +00:00
|
|
|
(all_data_after_row_policy.a == 0)
|
|
|
|
& (all_data_after_row_policy.c > query_range_start)
|
|
|
|
& (all_data_after_row_policy.c <= query_range_end)
|
|
|
|
]
|
|
|
|
self.query(
|
|
|
|
"SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format(
|
|
|
|
to_select, query_range_start, query_range_end
|
|
|
|
),
|
|
|
|
False,
|
|
|
|
expected,
|
|
|
|
)
|
2022-12-25 14:44:31 +00:00
|
|
|
|
|
|
|
expected = all_data_after_row_policy[
|
2023-03-23 15:33:23 +00:00
|
|
|
(all_data_after_row_policy.a == 0)
|
|
|
|
& (
|
|
|
|
(all_data_after_row_policy.c <= query_range_start)
|
|
|
|
| (all_data_after_row_policy.c > query_range_end)
|
|
|
|
)
|
|
|
|
]
|
|
|
|
self.query(
|
|
|
|
"SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format(
|
|
|
|
to_select, query_range_start, query_range_end
|
|
|
|
),
|
|
|
|
False,
|
|
|
|
expected,
|
|
|
|
)
|
|
|
|
|
|
|
|
def run_test(
|
|
|
|
self,
|
|
|
|
delete_range_start,
|
|
|
|
delete_range_end,
|
|
|
|
row_level_policy_range_start,
|
|
|
|
row_level_policy_range_end,
|
|
|
|
):
|
2022-10-27 21:20:22 +00:00
|
|
|
self.repro_queries = []
|
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query(
|
|
|
|
"""
|
2022-10-27 21:20:22 +00:00
|
|
|
CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a))
|
|
|
|
ENGINE = MergeTree() ORDER BY (a, b)
|
2023-03-23 15:33:23 +00:00
|
|
|
SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format(
|
|
|
|
self.index_granularity
|
|
|
|
)
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query(
|
|
|
|
"INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format(
|
|
|
|
self.total_rows
|
|
|
|
)
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2022-12-25 14:44:31 +00:00
|
|
|
client = ClickHouseClient()
|
2023-03-23 15:33:23 +00:00
|
|
|
all_data = client.query_return_df(
|
|
|
|
"SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;"
|
|
|
|
)
|
2022-12-25 14:44:31 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;")
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2022-11-16 17:14:11 +00:00
|
|
|
# After all data has been written add a column with default value
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;")
|
2022-11-16 17:14:11 +00:00
|
|
|
|
2022-12-25 14:44:31 +00:00
|
|
|
self.check_data(all_data, -100, -100, -100, -100)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query(
|
|
|
|
"DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format(
|
|
|
|
delete_range_start, delete_range_end
|
|
|
|
)
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2022-12-25 14:44:31 +00:00
|
|
|
self.check_data(all_data, delete_range_start, delete_range_end, -100, -100)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query(
|
|
|
|
"CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format(
|
|
|
|
row_level_policy_range_start, row_level_policy_range_end
|
|
|
|
)
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.check_data(
|
|
|
|
all_data,
|
|
|
|
delete_range_start,
|
|
|
|
delete_range_end,
|
|
|
|
row_level_policy_range_start,
|
|
|
|
row_level_policy_range_end,
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query("DROP POLICY policy_tab_02473 ON tab_02473;")
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
self.query("DROP TABLE tab_02473;")
|
2022-10-27 21:20:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2022-12-28 14:51:16 +00:00
|
|
|
# Set mutations to synchronous mode and enable lightweight DELETE's
|
2023-03-23 15:33:23 +00:00
|
|
|
url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1"
|
2022-10-27 21:20:22 +00:00
|
|
|
|
2023-03-23 15:33:23 +00:00
|
|
|
default_index_granularity = 10
|
2024-07-30 18:00:23 +00:00
|
|
|
total_rows = 7 * default_index_granularity
|
2022-10-27 21:20:22 +00:00
|
|
|
step = default_index_granularity
|
2024-08-02 14:05:39 +00:00
|
|
|
session = requests_session_with_retries()
|
2023-03-23 15:33:23 +00:00
|
|
|
for index_granularity in [
|
|
|
|
default_index_granularity - 1,
|
|
|
|
default_index_granularity,
|
|
|
|
]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]:
|
2022-10-27 21:20:22 +00:00
|
|
|
tester = Tester(session, url, index_granularity, total_rows)
|
|
|
|
# Test combinations of ranges of various size masked by lightweight DELETES
|
|
|
|
# along with ranges of various size masked by row-level policies
|
2022-10-30 09:36:48 +00:00
|
|
|
for delete_range_start in range(0, total_rows, 3 * step):
|
2023-03-23 15:33:23 +00:00
|
|
|
for delete_range_end in range(
|
|
|
|
delete_range_start + 3 * step, total_rows, 2 * step
|
|
|
|
):
|
2022-10-30 09:36:48 +00:00
|
|
|
for row_level_policy_range_start in range(0, total_rows, 3 * step):
|
2023-03-23 15:33:23 +00:00
|
|
|
for row_level_policy_range_end in range(
|
|
|
|
row_level_policy_range_start + 3 * step, total_rows, 2 * step
|
|
|
|
):
|
|
|
|
tester.run_test(
|
|
|
|
delete_range_start,
|
|
|
|
delete_range_end,
|
|
|
|
row_level_policy_range_start,
|
|
|
|
row_level_policy_range_end,
|
|
|
|
)
|
2022-10-27 21:20:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|